Date: (Fri) Nov 20, 2015
Data: Source: Training: https://www.kaggle.com/c/15-071x-the-analytics-edge-competition-spring-2015/download/NYTimesBlogTrain.csv
New: https://www.kaggle.com/c/15-071x-the-analytics-edge-competition-spring-2015/download/NYTimesBlogTest.csv
Time period:
Based on analysis utilizing <> techniques,
Summary of key steps & error improvement stats:
Use plot.ly for interactive plots ?
varImp for randomForest crashes in caret version:6.0.41 -> submit bug report
extensions toward multiclass classification are scheduled for the next release
glm_dmy_mdl should use the same method as glm_sel_mdl until custom dummy classifer is implemented
rm(list = ls())
set.seed(12345)
options(stringsAsFactors = FALSE)
source("~/Dropbox/datascience/R/myscript.R")
source("~/Dropbox/datascience/R/mydsutils.R")
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
source("~/Dropbox/datascience/R/myplot.R")
source("~/Dropbox/datascience/R/mypetrinet.R")
source("~/Dropbox/datascience/R/myplclust.R")
source("~/Dropbox/datascience/R/mytm.R")
# Gather all package requirements here
suppressPackageStartupMessages(require(doMC))
registerDoMC(6) # # of cores on machine - 2
suppressPackageStartupMessages(require(caret))
#source("dbgcaret.R")
#packageVersion("snow")
#require(sos); findFn("cosine", maxPages=2, sortby="MaxScore")
# Analysis control global variables
# Inputs
glb_trnng_url <- "https://www.kaggle.com/c/15-071x-the-analytics-edge-competition-spring-2015/download/NYTimesBlogTrain.csv"
glb_newdt_url <- "https://www.kaggle.com/c/15-071x-the-analytics-edge-competition-spring-2015/download/NYTimesBlogTest.csv"
glbInpMerge <- NULL #: default
# list(fnames = c("<fname1>", "<fname2>")) # files will be concatenated
glb_is_separate_newobs_dataset <- TRUE # or TRUE
glb_split_entity_newobs_datasets <- FALSE # select from c(FALSE, TRUE)
glb_split_newdata_method <- NULL # select from c(NULL, "condition", "sample", "copy")
glb_split_newdata_condition <- NULL # or "is.na(<var>)"; "<var> <condition_operator> <value>"
glb_split_newdata_size_ratio <- 0.3 # > 0 & < 1
glb_split_sample.seed <- 123 # or any integer
glbObsDropCondition <- NULL # : default
# "<condition>" # use | & ; NOT || &&
#parse(text=glbObsDropCondition)
#subset(glbObsAll, .grpid %in% c(31))
glb_obs_repartition_train_condition <- NULL # : default
# "<condition>"
glb_max_fitobs <- NULL # or any integer
glb_is_regression <- FALSE; glb_is_classification <- !glb_is_regression;
glb_is_binomial <- TRUE # or TRUE or FALSE
glb_rsp_var_raw <- "Popular"
# for classification, the response variable has to be a factor
glb_rsp_var <- "Popular.fctr" # glb_rsp_var_raw # or "Popular.fctr"
# if the response factor is based on numbers/logicals e.g (0/1 OR TRUE/FALSE vs. "A"/"B"),
# or contains spaces (e.g. "Not in Labor Force")
# caret predict(..., type="prob") crashes
glb_map_rsp_raw_to_var <- #NULL
function(raw) {
# return(raw ^ 0.5)
# return(log(1 + raw))
# return(log10(raw))
# return(exp(-raw / 2))
ret_vals <- rep_len(NA, length(raw)); ret_vals[!is.na(raw)] <- ifelse(raw[!is.na(raw)] == 1, "Y", "N"); return(relevel(as.factor(ret_vals), ref="N"))
# #as.factor(paste0("B", raw))
# #as.factor(gsub(" ", "\\.", raw))
}
# glb_map_rsp_raw_to_var(tst <- c(NA, 0, 1))
# glb_map_rsp_raw_to_var(tst <- c(NA, 0, 2.99, 280.50, 1000.00))
glb_map_rsp_var_to_raw <- #NULL
function(var) {
# return(var ^ 2.0)
# return(exp(var))
# return(10 ^ var)
# return(-log(var) * 2)
as.numeric(var) - 1
# gsub("\\.", " ", levels(var)[as.numeric(var)])
# c("<=50K", " >50K")[as.numeric(var)]
# c(FALSE, TRUE)[as.numeric(var)]
}
# glb_map_rsp_var_to_raw(glb_map_rsp_raw_to_var(tst))
if ((glb_rsp_var != glb_rsp_var_raw) && is.null(glb_map_rsp_raw_to_var))
stop("glb_map_rsp_raw_to_var function expected")
# List info gathered for various columns
# <col_name>: <description>; <notes>
# NewsDesk = the New York Times desk that produced the story (Business, Culture, Foreign, etc.)
# SectionName = the section the article appeared in (Opinion, Arts, Technology, etc.)
# SubsectionName = the subsection the article appeared in (Education, Small Business, Room for Debate, etc.)
# Headline = the title of the article
# Snippet = a small portion of the article text
# Abstract = a summary of the blog article, written by the New York Times
# WordCount = the number of words in the article
# PubDate = the publication date, in the format "Year-Month-Day Hour:Minute:Second"
# UniqueID = a unique identifier for each article
# If multiple vars are parts of id, consider concatenating them to create one id var
# If glb_id_var == NULL, ".rownames <- row.names()" is the default
# User-specified exclusions
glbFeatsExclude <- c(NULL
# Feats that shd be excluded due to known causation by prediction variable
# , "<feat1", "<feat2>"
# Feats that are linear combinations (alias in glm)
# Feature-engineering phase -> start by excluding all features except id & category & work each one in
, "NewsDesk", "SectionName", "SubsectionName"
, "WordCount", "PubDate"
# Feature Engineering done with prior features
, "Headline", "Snippet", "Abstract"
)
if (glb_rsp_var_raw != glb_rsp_var)
glbFeatsExclude <- union(glbFeatsExclude, glb_rsp_var_raw)
glbFeatsInteractionOnly <- list()
#glbFeatsInteractionOnly[["carrier.fctr"]] <- "cellular.fctr"
# currently does not handle more than 1 column; consider concatenating multiple columns
glb_id_var <- "UniqueID" # choose from c(NULL : default, "<id_feat>")
glbFeatsCategory <- "NDSSName.my.fctr" # choose from c(NULL : default, "<category>")
glb_drop_vars <- c(NULL
# , "<feat1>", "<feat2>"
)
glb_map_vars <- NULL # or c("<var1>", "<var2>")
glb_map_urls <- list();
# glb_map_urls[["<var1>"]] <- "<var1.url>"
glb_assign_pairs_lst <- NULL;
# glb_assign_pairs_lst[["<var1>"]] <- list(from=c(NA),
# to=c("NA.my"))
glb_assign_vars <- names(glb_assign_pairs_lst)
# Derived features; Use this mechanism to cleanse data ??? Cons: Data duplication ???
glbFeatsDerive <- list();
# glbFeatsDerive[["<feat.my.sfx>"]] <- list(
# mapfn = function(<arg1>, <arg2>) { return(function(<arg1>, <arg2>)) }
# , args = c("<arg1>", "<arg2>"))
# character
# mapfn = function(Week) { return(substr(Week, 1, 10)) }
# mapfn = function(descriptor) { return(plyr::revalue(descriptor, c(
# "ABANDONED BUILDING" = "OTHER",
# "**" = "**"
# ))) }
glbFeatsDerive[["NDSSName.my"]] <- list(
mapfn = function(NewsDesk, SectionName, SubsectionName) {
descriptor <-
gsub(" ", "", paste(NewsDesk, SectionName, SubsectionName, sep = "#"))
return(plyr::revalue(descriptor, c(NULL
, "#BusinessDay#Dealbook" = "Business#BusinessDay#Dealbook"
, "#BusinessDay#SmallBusiness" = "Business#BusinessDay#SmallBusiness"
, "#Crosswords/Games#" = "Business#Crosswords/Games#"
, "#Open#" = "Business#Technology#"
, "#Technology#" = "Business#Technology#"
, "Business##" = "Business#Technology#"
, "#Arts#" = "Culture#Arts#"
, "Foreign##" = "Foreign#World#"
, "#World#AsiaPacific" = "Foreign#World#AsiaPacific"
, "#N.Y./Region#" = "Metro#N.Y./Region#"
, "#Opinion#" = "OpEd#Opinion#"
, "OpEd##" = "OpEd#Opinion#"
, "#Health#" = "Science#Health#"
, "Science##" = "Science#Health#"
, "Styles#Health#" = "Science#Health#"
, "Styles##" = "Styles##Fashion"
, "Styles#Style#Fashion&Style" = "Styles##Fashion"
, "#Travel#" = "Travel#Travel#"
, "Magazine#Magazine#" = "myOther"
, "National##" = "myOther"
, "National#U.S.#Politics" = "myOther"
, "Sports##" = "myOther"
, "Sports#Sports#" = "myOther"
, "#U.S.#" = "myOther"
)))
}
, args = c("NewsDesk", "SectionName", "SubsectionName"))
# mapfn = function(description) { mod_raw <- description;
# This is here because it does not work if it's in txt_map_filename
# mod_raw <- gsub(paste0(c("\n", "\211", "\235", "\317", "\333"), collapse = "|"), " ", mod_raw)
# Don't parse for "." because of ".com"; use customized gsub for that text
# mod_raw <- gsub("(\\w)(!|\\*|,|-|/)(\\w)", "\\1\\2 \\3", mod_raw);
# return(mod_raw) }
#print(mod_raw <- grep(""", glbObsAll[, txt_var], value = TRUE))
#print(mod_raw <- glbObsAll[c(88,187,280,1040,1098), txt_var])
#print(mod_raw <- glbObsAll[sel_obs(list(descr.my.contains="\\bdoes( +)not\\b")), glbFeatsText])
#print(mod_raw <- glbObsAll[sel_obs(list(descr.my.contains="\\bipad [[:digit:]]\\b")), glbFeatsText][01:10])
#print(mod_raw <- glbObsAll[sel_obs(list(descr.my.contains="pad mini")), glbFeatsText][11:20])
#print(mod_raw <- glbObsAll[sel_obs(list(descr.my.contains="pad mini")), glbFeatsText][21:30])
#print(mod_raw <- glbObsAll[sel_obs(list(descr.my.contains="pad mini")), glbFeatsText][31:40])
#glbObsAll[which(glb_post_stop_words_terms_mtrx_lst[[txt_var]][, subset(glb_post_stop_words_terms_df_lst[[txt_var]], term %in% c("conditionminimal"))$pos] > 0), "description"]
# numeric
# Create feature based on record position/id in data
# glbFeatsDerive[["dummy.my"]] <- list(
# mapfn = function(UniqueID) { return(UniqueID) }
# , args = c("UniqueID"))
# Add logs of numerics that are not distributed normally
# Derive & keep multiple transformations of the same feature, if normality is hard to achieve with just one transformation
# Right skew: logp1; sqrt; ^ 1/3; logp1(logp1); log10; exp(-<feat>/constant)
glbFeatsDerive[["WordCount.log1p"]] <- list(
mapfn = function(WordCount) { return(log1p(WordCount)) }
, args = c("WordCount"))
glbFeatsDerive[["WordCount.root2"]] <- list(
mapfn = function(WordCount) { return(WordCount ^ (1/2)) }
, args = c("WordCount"))
glbFeatsDerive[["WordCount.nexp"]] <- list(
mapfn = function(WordCount) { return(exp(-WordCount)) }
, args = c("WordCount"))
#print(summary(glbObsAll$WordCount))
#print(summary(mapfn(glbObsAll$WordCount)))
# mapfn = function(Rasmussen) { return(ifelse(sign(Rasmussen) >= 0, 1, 0)) }
# mapfn = function(startprice) { return(startprice ^ (1/2)) }
# mapfn = function(startprice) { return(log(startprice)) }
# mapfn = function(startprice) { return(exp(-startprice / 20)) }
# mapfn = function(startprice) { return(scale(log(startprice))) }
# mapfn = function(startprice) { return(sign(sprice.predict.diff) * (abs(sprice.predict.diff) ^ (1/10))) }
# factor
# mapfn = function(PropR) { return(as.factor(ifelse(PropR >= 0.5, "Y", "N"))) }
# mapfn = function(productline, description) { as.factor(gsub(" ", "", productline)) }
# mapfn = function(purpose) { return(relevel(as.factor(purpose), ref="all_other")) }
# mapfn = function(raw) { tfr_raw <- as.character(cut(raw, 5));
# tfr_raw[is.na(tfr_raw)] <- "NA.my";
# return(as.factor(tfr_raw)) }
# mapfn = function(startprice.log10) { return(cut(startprice.log10, 3)) }
# mapfn = function(startprice.log10) { return(cut(sprice.predict.diff, c(-1000, -100, -10, -1, 0, 1, 10, 100, 1000))) }
# , args = c("<arg1>"))
# multiple args
# mapfn = function(PTS, oppPTS) { return(PTS - oppPTS) }
# mapfn = function(startprice.log10.predict, startprice) {
# return(spdiff <- (10 ^ startprice.log10.predict) - startprice) }
# mapfn = function(productline, description) { as.factor(
# paste(gsub(" ", "", productline), as.numeric(nchar(description) > 0), sep = "*")) }
# # If glbObsAll is not sorted in the desired manner
# mapfn=function(Week) { return(coredata(lag(zoo(orderBy(~Week, glbObsAll)$ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI) { return(coredata(lag(zoo(ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI.2.lag) { return(log(ILI.2.lag)) }
# glbFeatsDerive[["<var1>"]] <- glbFeatsDerive[["<var2>"]]
glb_derive_vars <- names(glbFeatsDerive)
# tst <- "descr.my"; args_lst <- NULL; for (arg in glbFeatsDerive[[tst]]$args) args_lst[[arg]] <- glbObsAll[, arg]; print(head(args_lst[[arg]])); print(head(drv_vals <- do.call(glbFeatsDerive[[tst]]$mapfn, args_lst)));
# print(which_ix <- which(args_lst[[arg]] == 0.75)); print(drv_vals[which_ix]);
glbFeatsDateTime <- list()
glbFeatsDateTime[["PubDate"]] <-
c(format = "%Y-%m-%d %H:%M:%S", timezone = "America/New_York", impute.na = FALSE)
glbFeatsPrice <- NULL # or c("<price_var>")
glbFeatsText <- NULL # c("<txt_var>") # NULL #
Sys.setlocale("LC_ALL", "C") # For english
## [1] "C/C/C/C/C/en_US.UTF-8"
# Text Processing Step: custom modifications not present in txt_munge -> use glbFeatsDerive
# Text Processing Step: universal modifications
glb_txt_munge_filenames_pfx <- "NYTBlogs3_mytxt_"
# Text Processing Step: tolower
# Text Processing Step: myreplacePunctuation
# Text Processing Step: removeWords
glb_txt_stop_words <- list()
# Remember to use unstemmed words
if (!is.null(glbFeatsText)) {
require(tm)
glb_txt_stop_words[["<txt_var>"]] <- sort(c(NULL
# Remove any words from stopwords
# , setdiff(myreplacePunctuation(stopwords("english")), c("<keep_wrd1>", <keep_wrd2>"))
# cor.y.train == NA
# ,unlist(strsplit(paste(c(NULL
# ,"<comma-separated-terms>"
# ), collapse=",")
# freq == 1; keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
# chisq.pval high (e.g. == 1); keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
# nzv.freqRatio high (e.g. >= glb_nzv_freqCut); keep c("<comma-separated-terms-to-keep>")
# ,<comma-separated-terms>
))
}
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txt_var]][grep("^2", glb_post_stem_words_terms_df_lst[[txt_var]]$term), ])
#glbObsAll[glb_post_stem_words_terms_mtrx_lst[[txt_var]][, 6] > 0, glbFeatsText]
# To identify terms with a specific freq
#paste0(sort(subset(glb_post_stop_words_terms_df_lst[[txt_var]], freq == 1)$term), collapse = ",")
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txt_var]], freq <= 2)$term), collapse = ",")
# To identify terms with a specific freq &
# are not stemmed together later OR is value of color.fctr (e.g. gold)
#paste0(sort(subset(glb_post_stop_words_terms_df_lst[[txt_var]], (freq == 1) & !(term %in% c("blacked","blemish","blocked","blocks","buying","cables","careful","carefully","changed","changing","chargers","cleanly","cleared","connect","connects","connected","contains","cosmetics","default","defaulting","defective","definitely","describe","described","devices","displays","drop","drops","engravement","excellant","excellently","feels","fix","flawlessly","frame","framing","gentle","gold","guarantee","guarantees","handled","handling","having","install","iphone","iphones","keeped","keeps","known","lights","line","lining","liquid","liquidation","looking","lots","manuals","manufacture","minis","most","mostly","network","networks","noted","opening","operated","performance","performs","person","personalized","photograph","physically","placed","places","powering","pre","previously","products","protection","purchasing","returned","rotate","rotation","running","sales","second","seconds","shipped","shuts","sides","skin","skinned","sticker","storing","thats","theres","touching","unusable","update","updates","upgrade","weeks","wrapped","verified","verify") ))$term), collapse = ",")
#print(subset(glb_post_stem_words_terms_df_lst[[txt_var]], (freq <= 2)))
#glbObsAll[which(terms_mtrx[, 229] > 0), glbFeatsText]
# To identify terms with cor.y == NA
#orderBy(~-freq+term, subset(glb_post_stop_words_terms_df_lst[[txt_var]], is.na(cor.y)))
#paste(sort(subset(glb_post_stop_words_terms_df_lst[[txt_var]], is.na(cor.y))[, "term"]), collapse=",")
#orderBy(~-freq+term, subset(glb_post_stem_words_terms_df_lst[[txt_var]], is.na(cor.y)))
# To identify terms with low cor.y.abs
#head(orderBy(~cor.y.abs+freq+term, subset(glb_post_stem_words_terms_df_lst[[txt_var]], !is.na(cor.y))), 5)
# To identify terms with high chisq.pval
#subset(glb_post_stem_words_terms_df_lst[[txt_var]], chisq.pval > 0.99)
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txt_var]], (chisq.pval > 0.99) & (freq <= 10))$term), collapse=",")
#paste0(sort(subset(glb_post_stem_words_terms_df_lst[[txt_var]], (chisq.pval > 0.9))$term), collapse=",")
#head(orderBy(~-chisq.pval+freq+term, glb_post_stem_words_terms_df_lst[[txt_var]]), 5)
#glbObsAll[glb_post_stem_words_terms_mtrx_lst[[txt_var]][, 68] > 0, glbFeatsText]
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txt_var]][grep("^m", glb_post_stem_words_terms_df_lst[[txt_var]]$term), ])
# To identify terms with high nzv.freqRatio
#summary(glb_post_stem_words_terms_df_lst[[txt_var]]$nzv.freqRatio)
#paste0(sort(setdiff(subset(glb_post_stem_words_terms_df_lst[[txt_var]], (nzv.freqRatio >= glb_nzv_freqCut) & (freq < 10) & (chisq.pval >= 0.05))$term, c( "128gb","3g","4g","gold","ipad1","ipad3","ipad4","ipadair2","ipadmini2","manufactur","spacegray","sprint","tmobil","verizon","wifion"))), collapse=",")
# To identify obs with a txt term
#tail(orderBy(~-freq+term, glb_post_stop_words_terms_df_lst[[txt_var]]), 20)
#mydspObs(list(descr.my.contains="non"), cols=c("color", "carrier", "cellular", "storage"))
#grep("ever", dimnames(terms_stop_mtrx)$Terms)
#which(terms_stop_mtrx[, grep("ipad", dimnames(terms_stop_mtrx)$Terms)] > 0)
#glbObsAll[which(terms_stop_mtrx[, grep("16", dimnames(terms_stop_mtrx)$Terms)[1]] > 0), c(glbFeatsCategory, "storage", txt_var)]
# To identify whether terms shd be synonyms
#orderBy(~term, glb_post_stop_words_terms_df_lst[[txt_var]][grep("^moder", glb_post_stop_words_terms_df_lst[[txt_var]]$term), ])
# term_row_df <- glb_post_stop_words_terms_df_lst[[txt_var]][grep("^came$", glb_post_stop_words_terms_df_lst[[txt_var]]$term), ]
#
# cor(glb_post_stop_words_terms_mtrx_lst[[txt_var]][glbObsAll$.lcn == "Fit", term_row_df$pos], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
# To identify which stopped words are "close" to a txt term
#sort(cluster_vars)
# Text Processing Step: stemDocument
# To identify stemmed txt terms
#glb_post_stop_words_terms_df_lst[[txt_var]][grep("condit", glb_post_stop_words_terms_df_lst[[txt_var]]$term), ]
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txt_var]][grep("^con", glb_post_stem_words_terms_df_lst[[txt_var]]$term), ])
#glbObsAll[which(terms_stem_mtrx[, grep("use", dimnames(terms_stem_mtrx)$Terms)[[1]]] > 0), c(glb_id_var, "productline", txt_var)]
#glbObsAll[which(TfIdf_stem_mtrx[, 191] > 0), c(glb_id_var, glbFeatsCategory, txt_var)]
#which(glbObsAll$UniqueID %in% c(11915, 11926, 12198))
# Text Processing Step: mycombineSynonyms
# To identify which terms are associated with not -> combine "could not" & "couldn't"
#findAssocs(glb_full_DTM_lst[[txt_var]], "not", 0.05)
# To identify which synonyms should be combined
#orderBy(~term, glb_post_stem_words_terms_df_lst[[txt_var]][grep("^c", glb_post_stem_words_terms_df_lst[[txt_var]]$term), ])
chk_comb_cor <- function(syn_lst) {
# cor(terms_stem_mtrx[glbObsAll$.src == "Train", grep("^(damag|dent|ding)$", dimnames(terms_stem_mtrx)[[2]])], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
print(subset(glb_post_stem_words_terms_df_lst[[txt_var]], term %in% syn_lst$syns))
print(subset(get_corpus_terms(tm_map(glb_txt_corpus_lst[[txt_var]], mycombineSynonyms, list(syn_lst), lazy=FALSE)), term == syn_lst$word))
# cor(terms_stop_mtrx[glbObsAll$.src == "Train", grep("^(damage|dent|ding)$", dimnames(terms_stop_mtrx)[[2]])], glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
# cor(rowSums(terms_stop_mtrx[glbObsAll$.src == "Train", grep("^(damage|dent|ding)$", dimnames(terms_stop_mtrx)[[2]])]), glbObsTrn[, glb_rsp_var], use="pairwise.complete.obs")
}
#chk_comb_cor(syn_lst=list(word="cabl", syns=c("cabl", "cord")))
#chk_comb_cor(syn_lst=list(word="damag", syns=c("damag", "dent", "ding")))
#chk_comb_cor(syn_lst=list(word="dent", syns=c("dent", "ding")))
#chk_comb_cor(syn_lst=list(word="use", syns=c("use", "usag")))
glb_txt_synonyms <- list()
#glb_txt_synonyms[["<txt_var>"]] <- list(NULL
# , list(word="<stem1>", syns=c("<stem1>", "<stem1_2>"))
# )
# options include: "weightTf", "myweightTflog1p", "myweightTfsqrt", "weightTfIdf", "weightBM25"
glb_txt_terms_control <- list(weighting = "weightTfIdf" # : default
# termFreq selection criteria across obs: tm default: list(global=c(1, Inf))
, bounds = list(global = c(1, Inf))
# wordLengths selection criteria: tm default: c(3, Inf)
, wordLengths = c(1, Inf)
)
glb_txt_cor_var <- glb_rsp_var # : default # or c(<feat>)
# select one from c("union.top.val.cor", "top.cor", "top.val", default: "top.chisq", "sparse")
glbFeatsTextFilter <- "top.chisq"
glbFeatsTextTermsMax <- rep(10, length(glbFeatsText)) # :default
names(glbFeatsTextTermsMax) <- glbFeatsText
# Text Processing Step: extractAssoc
glbFeatsTextAssocCor <- rep(1, length(glbFeatsText)) # :default
names(glbFeatsTextAssocCor) <- glbFeatsText
# Remember to use stemmed terms
glb_important_terms <- list()
# Text Processing Step: extractPatterns (ngrams)
glbFeatsTextPatterns <- list()
#glbFeatsTextPatterns[[<txt_var>>]] <- list()
#glbFeatsTextPatterns[[<txt_var>>]] <- c(metropolitan.diary.colon = "Metropolitan Diary:")
# Have to set it even if it is not used
# Properties:
# numrows(glb_feats_df) << numrows(glbObsFit
# Select terms that appear in at least 0.2 * O(FP/FN(glbObsOOB)) ???
# numrows(glbObsOOB) = 1.1 * numrows(glbObsNew) ???
glb_sprs_thresholds <- NULL # or c(<txt_var1> = 0.988, <txt_var2> = 0.970, <txt_var3> = 0.970)
glbFctrMaxUniqVals <- 21 # default: 20
glb_impute_na_data <- TRUE # FALSE # or TRUE
glb_mice_complete.seed <- 144 # or any integer
glb_cluster <- FALSE # : default or TRUE
glb_cluster.seed <- 189 # or any integer
glb_cluster_entropy_var <- glb_rsp_var # c(glb_rsp_var, as.factor(cut(glb_rsp_var, 3)), default: NULL)
glbFeatsTextClusterVarsExclude <- FALSE # default FALSE
glb_interaction_only_feats <- NULL # : default or c(<parent_feat> = "<child_feat>")
glb_nzv_freqCut <- 19 # 19 : caret default
glb_nzv_uniqueCut <- 10 # 10 : caret default
glbRFESizes <- list()
#glbRFESizes[["mdlFamily"]] <- c(4, 8, 16, 32, 64, 67, 68, 69) # Accuracy@69/70 = 0.8258
glbObsFitOutliers <- list()
# If outliers.n >= 10; consider concatenation of interaction vars
# glbObsFitOutliers[["<mdlFamily>"]] <- c(NULL
# is.na(.rstudent)
# is.na(.dffits)
# .hatvalues >= 0.99
# -38,167,642 < minmax(.rstudent) < 49,649,823
# , <comma-separated-<glb_id_var>>
# )
glbObsTrnOutliers <- list()
# influence.measures: car::outlier; rstudent; dffits; hatvalues; dfbeta; dfbetas
#mdlId <- "RFE.X.glm"; obs_df <- fitobs_df
#mdlId <- "Final.glm"; obs_df <- trnobs_df
#mdlId <- "CSM2.X.glm"; obs_df <- fitobs_df
#print(outliers <- car::outlierTest(glb_models_lst[[mdlId]]$finalModel))
#mdlIdFamily <- paste0(head(unlist(str_split(mdlId, "\\.")), -1), collapse="."); obs_df <- dplyr::filter_(obs_df, interp(~(!(var %in% glbObsFitOutliers[[mdlIdFamily]])), var = as.name(glb_id_var))); model_diags_df <- cbind(obs_df, data.frame(.rstudent=stats::rstudent(glb_models_lst[[mdlId]]$finalModel)), data.frame(.dffits=stats::dffits(glb_models_lst[[mdlId]]$finalModel)), data.frame(.hatvalues=stats::hatvalues(glb_models_lst[[mdlId]]$finalModel)));print(summary(model_diags_df[, c(".rstudent",".dffits",".hatvalues")])); table(cut(model_diags_df$.hatvalues, breaks=c(0.00, 0.98, 0.99, 1.00)))
#print(subset(model_diags_df, is.na(.rstudent))[, glb_id_var])
#print(subset(model_diags_df, is.na(.dffits))[, glb_id_var])
#print(model_diags_df[which.min(model_diags_df$.dffits), ])
#print(subset(model_diags_df, .hatvalues > 0.99)[, glb_id_var])
#dffits_df <- merge(dffits_df, outliers_df, by="row.names", all.x=TRUE); row.names(dffits_df) <- dffits_df$Row.names; dffits_df <- subset(dffits_df, select=-Row.names)
#dffits_df <- merge(dffits_df, glbObsFit, by="row.names", all.x=TRUE); row.names(dffits_df) <- dffits_df$Row.names; dffits_df <- subset(dffits_df, select=-Row.names)
#subset(dffits_df, !is.na(.Bonf.p))
#mdlId <- "CSM.X.glm"; vars <- myextract_actual_feats(row.names(orderBy(reformulate(c("-", paste0(mdlId, ".imp"))), myget_feats_imp(glb_models_lst[[mdlId]]))));
#model_diags_df <- glb_get_predictions(model_diags_df, mdlId, glb_rsp_var)
#obs_ix <- row.names(model_diags_df) %in% names(outliers$rstudent)[1]
#obs_ix <- which(is.na(model_diags_df$.rstudent))
#obs_ix <- which(is.na(model_diags_df$.dffits))
#myplot_parcoord(obs_df=model_diags_df[, c(glb_id_var, glbFeatsCategory, ".rstudent", ".dffits", ".hatvalues", glb_rsp_var, paste0(glb_rsp_var, mdlId), vars[1:min(20, length(vars))])], obs_ix=obs_ix, id_var=glb_id_var, category_var=glbFeatsCategory)
#model_diags_df[row.names(model_diags_df) %in% names(outliers$rstudent)[c(1:2)], ]
#ctgry_diags_df <- model_diags_df[model_diags_df[, glbFeatsCategory] %in% c("Unknown#0"), ]
#myplot_parcoord(obs_df=ctgry_diags_df[, c(glb_id_var, glbFeatsCategory, ".rstudent", ".dffits", ".hatvalues", glb_rsp_var, "startprice.log10.predict.RFE.X.glmnet", indep_vars[1:20])], obs_ix=row.names(ctgry_diags_df) %in% names(outliers$rstudent)[1], id_var=glb_id_var, category_var=glbFeatsCategory)
#table(glbObsFit[model_diags_df[, glbFeatsCategory] %in% c("iPad1#1"), "startprice.log10.cut.fctr"])
#glbObsFit[model_diags_df[, glbFeatsCategory] %in% c("iPad1#1"), c(glb_id_var, "startprice")]
# No outliers & .dffits == NaN
#myplot_parcoord(obs_df=model_diags_df[, c(glb_id_var, glbFeatsCategory, glb_rsp_var, "startprice.log10.predict.RFE.X.glmnet", indep_vars[1:10])], obs_ix=seq(1:nrow(model_diags_df))[is.na(model_diags_df$.dffits)], id_var=glb_id_var, category_var=glbFeatsCategory)
# Modify mdlId to (build & extract) "<FamilyId>#<Fit|Trn>#<caretMethod>#<preProc1.preProc2>#<samplingMethod>"
glb_models_lst <- list(); glb_models_df <- data.frame()
# Regression
if (glb_is_regression) {
glbMdlMethods <- c(NULL
# deterministic
#, "lm", # same as glm
, "glm", "bayesglm", "glmnet"
, "rpart"
# non-deterministic
, "gbm", "rf"
# Unknown
, "nnet" , "avNNet" # runs 25 models per cv sample for tunelength=5
, "svmLinear", "svmLinear2"
, "svmPoly" # runs 75 models per cv sample for tunelength=5
, "svmRadial"
, "earth"
, "bagEarth" # Takes a long time
)
} else
# Classification - Add ada (auto feature selection)
if (glb_is_binomial)
glbMdlMethods <- c(NULL
# deterministic
, "bagEarth" # Takes a long time
, "glm", "bayesglm", "glmnet"
, "nnet"
, "rpart"
# non-deterministic
, "gbm"
, "avNNet" # runs 25 models per cv sample for tunelength=5
, "rf"
# Unknown
, "lda", "lda2"
# svm models crash when predict is called -> internal to kernlab it should call predict without .outcome
, "svmLinear", "svmLinear2"
, "svmPoly" # runs 75 models per cv sample for tunelength=5
, "svmRadial"
, "earth"
) else
glbMdlMethods <- c(NULL
# non-deterministic
, "rf"
# Unknown
, "gbm", "rpart"
)
glb_mdl_family_lst <- list(); glb_mdl_feats_lst <- list()
# family: Choose from c("RFE.X", "CSM.X", "All.X", "Best.Interact")
# methods: Choose from c(NULL, <method>, glbMdlMethods)
#glb_mdl_family_lst[["RFE.X"]] <- c("glmnet", "glm") # non-NULL list is mandatory
glb_mdl_family_lst[["All.X"]] <- "glmnet" # non-NULL list is mandatory
#glb_mdl_family_lst[["Best.Interact"]] <- "glmnet" # non-NULL list is mandatory
# Check if interaction features make RFE better
# glb_mdl_family_lst[["CSM.X"]] <- setdiff(glbMdlMethods, c("lda", "lda2")) # crashing due to category:.clusterid ??? #c("glmnet", "glm") # non-NULL list is mandatory
# glb_mdl_feats_lst[["CSM.X"]] <- c(NULL
# , <comma-separated-features-vector>
# )
# dAFeats.CSM.X %<d-% c(NULL
# # Interaction feats up to varImp(RFE.X.glmnet) >= 50
# , <comma-separated-features-vector>
# , setdiff(myextract_actual_feats(predictors(rfe_fit_results)), c(NULL
# , <comma-separated-features-vector>
# ))
# )
# glb_mdl_feats_lst[["CSM.X"]] <- "%<d-% dAFeats.CSM.X"
# Check if tuning parameters make fit better; make it mdlFamily customizable ?
glb_tune_models_df <- data.frame()
#avNNet
# size=[1] 3 5 7 9; decay=[0] 1e-04 0.001 0.01 0.1; bag=[FALSE]; RMSE=1.3300906
#bagEarth
# degree=1 [2] 3; nprune=64 128 256 512 [1024]; RMSE=0.6486663 (up)
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "bagEarth", parameter = "nprune", vals = "256")
# ,data.frame(method = "bagEarth", parameter = "degree", vals = "2")
# ))
#earth
# degree=[1]; nprune=2 [9] 17 25 33; RMSE=0.1334478
#gbm
# shrinkage=0.05 [0.10] 0.15 0.20 0.25; n.trees=100 150 200 [250] 300; interaction.depth=[1] 2 3 4 5; n.minobsinnode=[10]; RMSE=0.2008313
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "gbm", parameter = "shrinkage", min = 0.05, max = 0.25, by = 0.05)
# ,data.frame(method = "gbm", parameter = "n.trees", min = 100, max = 300, by = 50)
# ,data.frame(method = "gbm", parameter = "interaction.depth", min = 1, max = 5, by = 1)
# ,data.frame(method = "gbm", parameter = "n.minobsinnode", min = 10, max = 10, by = 10)
# #seq(from=0.05, to=0.25, by=0.05)
# ))
#glmnet
# alpha=0.100 [0.325] 0.550 0.775 1.000; lambda=0.0005232693 0.0024288010 0.0112734954 [0.0523269304] 0.2428800957; RMSE=0.6164891
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "glmnet", parameter = "alpha", vals = "0.550 0.775 0.8875 0.94375 1.000")
# ,data.frame(method = "glmnet", parameter = "lambda", vals = "9.858855e-05 0.0001971771 0.0009152152 0.0042480525 0.0197177130")
# ))
#nnet
# size=3 5 [7] 9 11; decay=0.0001 0.001 0.01 [0.1] 0.2; RMSE=0.9287422
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "nnet", parameter = "size", vals = "3 5 7 9 11")
# ,data.frame(method = "nnet", parameter = "decay", vals = "0.0001 0.0010 0.0100 0.1000 0.2000")
# ))
#rf # Don't bother; results are not deterministic
# mtry=2 35 68 [101] 134; RMSE=0.1339974
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "rf", parameter = "mtry", vals = "2 5 9 13 17")
# ))
#rpart
# cp=0.020 [0.025] 0.030 0.035 0.040; RMSE=0.1770237
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "rpart", parameter = "cp", vals = "0.004347826 0.008695652 0.017391304 0.021739130 0.034782609")
# ))
#svmLinear
# C=0.01 0.05 [0.10] 0.50 1.00 2.00 3.00 4.00; RMSE=0.1271318; 0.1296718
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "svmLinear", parameter = "C", vals = "0.01 0.05 0.1 0.5 1")
# ))
#svmLinear2
# cost=0.0625 0.1250 [0.25] 0.50 1.00; RMSE=0.1276354
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method = "svmLinear2", parameter = "cost", vals = "0.0625 0.125 0.25 0.5 1")
# ))
#svmPoly
# degree=[1] 2 3 4 5; scale=0.01 0.05 [0.1] 0.5 1; C=0.50 1.00 [2.00] 3.00 4.00; RMSE=0.1276130
# glb_tune_models_df <- myrbind_df(glb_tune_models_df, rbind(data.frame()
# ,data.frame(method="svmPoly", parameter="degree", min=1, max=5, by=1) #seq(1, 5, 1)
# ,data.frame(method="svmPoly", parameter="scale", vals="0.01, 0.05, 0.1, 0.5, 1")
# ,data.frame(method="svmPoly", parameter="C", vals="0.50, 1.00, 2.00, 3.00, 4.00")
# ))
#svmRadial
# sigma=[0.08674323]; C=0.25 0.50 1.00 [2.00] 4.00; RMSE=0.1614957
#glb2Sav(); all.equal(sav_models_df, glb_models_df)
glb_preproc_methods <- NULL
# c("YeoJohnson", "center.scale", "range", "pca", "ica", "spatialSign")
# Baseline prediction model feature(s)
glb_Baseline_mdl_var <- NULL # or c("<feat>")
glbMdlMetric_terms <- NULL # or matrix(c(
# 0,1,2,3,4,
# 2,0,1,2,3,
# 4,2,0,1,2,
# 6,4,2,0,1,
# 8,6,4,2,0
# ), byrow=TRUE, nrow=5)
glbMdlMetricSummary <- NULL # or "<metric_name>"
glbMdlMetricMaximize <- NULL # or FALSE (TRUE is not the default for both classification & regression)
glbMdlMetricSummaryFn <- NULL # or function(data, lev=NULL, model=NULL) {
# confusion_mtrx <- t(as.matrix(confusionMatrix(data$pred, data$obs)))
# #print(confusion_mtrx)
# #print(confusion_mtrx * glbMdlMetric_terms)
# metric <- sum(confusion_mtrx * glbMdlMetric_terms) / nrow(data)
# names(metric) <- glbMdlMetricSummary
# return(metric)
# }
glb_rcv_n_folds <- 3 # or NULL
glb_rcv_n_repeats <- 3 # or NULL
glb_clf_proba_threshold <- NULL # 0.5
# Model selection criteria
if (glb_is_regression)
glbMdlMetricsEval <- c("min.RMSE.OOB", "max.R.sq.OOB", "max.Adj.R.sq.fit", "min.RMSE.fit")
#glbMdlMetricsEval <- c("min.RMSE.fit", "max.R.sq.fit", "max.Adj.R.sq.fit")
if (glb_is_classification) {
if (glb_is_binomial)
glbMdlMetricsEval <-
c("max.Accuracy.OOB", "max.AUCROCR.OOB", "max.AUCpROC.OOB", "min.aic.fit", "max.Accuracy.fit") else
glbMdlMetricsEval <- c("max.Accuracy.OOB", "max.Kappa.OOB")
}
# select from NULL [no ensemble models], "auto" [all models better than MFO or Baseline], c(mdl_ids in glb_models_lst) [Typically top-rated models in auto]
glb_mdl_ensemble <- NULL
# "%<d-% setdiff(mygetEnsembleAutoMdlIds(), 'CSM.X.rf')"
# c(<comma-separated-mdlIds>
# )
# Only for classifications; for regressions remove "(.*)\\.prob" form the regex
# tmp_fitobs_df <- glbObsFit[, grep(paste0("^", gsub(".", "\\.", mygetPredictIds$value, fixed = TRUE), "CSM\\.X\\.(.*)\\.prob"), names(glbObsFit), value = TRUE)]; cor_mtrx <- cor(tmp_fitobs_df); cor_vctr <- sort(cor_mtrx[row.names(orderBy(~-Overall, varImp(glb_models_lst[["Ensemble.repeatedcv.glmnet"]])$imp))[1], ]); summary(cor_vctr); cor_vctr
#ntv.glm <- glm(reformulate(indep_vars, glb_rsp_var), family = "binomial", data = glbObsFit)
#step.glm <- step(ntv.glm)
glb_sel_mdl_id <- "All.X##rcv#glmnet" #select from c(NULL, "All.X##rcv#glmnet", "RFE.X##rcv#glmnet", <mdlId>)
glb_fin_mdl_id <- NULL #select from c(NULL, glb_sel_mdl_id)
glb_dsp_cols <- c(glb_id_var, glbFeatsCategory, glb_rsp_var
# List critical cols excl. glb_id_var, glbFeatsCategory & glb_rsp_var
)
# Output specs
glbOutDataVizFname <- "NYTBlogs3_obsall.csv" # choose from c(NULL, "NYTBlogs3_obsall.csv")
glb_out_obs <- NULL # select from c(NULL : default to "new", "all", "new", "trn")
glb_out_vars_lst <- list()
# glb_id_var will be the first output column, by default
glb_out_vars_lst[["Probability1"]] <-
"%<d-% mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$prob"
# glb_out_vars_lst[[glb_rsp_var_raw]] <- glb_rsp_var_raw
# glb_out_vars_lst[[paste0(head(unlist(strsplit(mygetPredictIds$value, "")), -1), collapse = "")]] <-
glbOutStackFnames <- NULL #: default
# c("ebayipads_txt_assoc1_out_bid1_stack.csv") # manual stack
# c("ebayipads_finmdl_bid1_out_nnet_1.csv") # universal stack
glb_out_pfx <- "NYTBlogs3_feat_PubDate_"
glb_save_envir <- FALSE # or TRUE
# Depict process
glb_analytics_pn <- petrinet(name = "glb_analytics_pn",
trans_df = data.frame(id = 1:6,
name = c("data.training.all","data.new",
"model.selected","model.final",
"data.training.all.prediction","data.new.prediction"),
x=c( -5,-5,-15,-25,-25,-35),
y=c( -5, 5, 0, 0, -5, 5)
),
places_df=data.frame(id=1:4,
name=c("bgn","fit.data.training.all","predict.data.new","end"),
x=c( -0, -20, -30, -40),
y=c( 0, 0, 0, 0),
M0=c( 3, 0, 0, 0)
),
arcs_df=data.frame(
begin=c("bgn","bgn","bgn",
"data.training.all","model.selected","fit.data.training.all",
"fit.data.training.all","model.final",
"data.new","predict.data.new",
"data.training.all.prediction","data.new.prediction"),
end =c("data.training.all","data.new","model.selected",
"fit.data.training.all","fit.data.training.all","model.final",
"data.training.all.prediction","predict.data.new",
"predict.data.new","data.new.prediction",
"end","end")
))
#print(ggplot.petrinet(glb_analytics_pn))
print(ggplot.petrinet(glb_analytics_pn) + coord_flip())
## Loading required package: grid
glb_analytics_avl_objs <- NULL
glb_chunks_df <- myadd_chunk(NULL, "import.data")
## label step_major step_minor label_minor bgn end elapsed
## 1 import.data 1 0 0 5.077 NA NA
1.0: import data## [1] "Reading file ./data/NYTimesBlogTrain.csv..."
## [1] "dimensions of data in ./data/NYTimesBlogTrain.csv: 6,532 rows x 10 cols"
## NewsDesk SectionName SubsectionName
## 1 Business Crosswords/Games
## 2 Culture Arts
## 3 Business Business Day Dealbook
## 4 Business Business Day Dealbook
## 5 Science Health
## 6 Science Health
## Headline
## 1 More School Daze
## 2 New 96-Page Murakami Work Coming in December
## 3 Public Pension Funds Stay Mum on Corporate Expats
## 4 Boot Camp for Bankers
## 5 Of Little Help to Older Knees
## 6 A Benefit of Legal Marijuana
## Snippet
## 1 A puzzle from Ethan Cooper that reminds me that a bill is due.
## 2 The Strange Library will arrive just three and a half months after Mr. Murakamis latest novel, Colorless Tsukuru Tazaki and His Years of Pilgrimage.
## 3 Public pension funds have major stakes in American companies moving overseas to cut their tax bills. But they are saying little about the strategy, which could hurt the nations tax base.
## 4 As they struggle to find new business to bolster sluggish earnings, banks consider the nations 25 million veterans and service members ideal customers.
## 5 Middle-aged and older patients are unlikely to benefit in the long term from surgery to repair tears in the meniscus, pads of cartilage in the knee, a new review of studies has found.
## 6 A new study has found evidence that legal access to marijuana is associated with fewer opioid overdose deaths, but researchers said their findings should not be used as the basis for the wide adoption of legalized cannabis.
## Abstract
## 1 A puzzle from Ethan Cooper that reminds me that a bill is due.
## 2 The Strange Library will arrive just three and a half months after Mr. Murakamis latest novel, Colorless Tsukuru Tazaki and His Years of Pilgrimage.
## 3 Public pension funds have major stakes in American companies moving overseas to cut their tax bills. But they are saying little about the strategy, which could hurt the nations tax base.
## 4 As they struggle to find new business to bolster sluggish earnings, banks consider the nations 25 million veterans and service members ideal customers.
## 5 Middle-aged and older patients are unlikely to benefit in the long term from surgery to repair tears in the meniscus, pads of cartilage in the knee, a new review of studies has found.
## 6 A new study has found evidence that legal access to marijuana is associated with fewer opioid overdose deaths, but researchers said their findings should not be used as the basis for the wide adoption of legalized cannabis.
## WordCount PubDate Popular UniqueID
## 1 508 2014-09-01 22:00:09 1 1
## 2 285 2014-09-01 21:14:07 0 2
## 3 1211 2014-09-01 21:05:36 0 3
## 4 1405 2014-09-01 20:43:34 1 4
## 5 181 2014-09-01 18:58:51 1 5
## 6 245 2014-09-01 18:52:22 1 6
## NewsDesk SectionName SubsectionName
## 226 Styles
## 995
## 3327
## 4753 Multimedia
## 4802 Business Crosswords/Games
## 6463 TStyle
## Headline
## 226 For Tavi Gevinson, Fashion Takes a Back Seat, for Now
## 995 Reconsidering What to Call an Extremist Group
## 3327 Clinton's Diagnosis of What's Wrong With Politics
## 4753 'Off Color' and on Target About Race in America
## 4802 Daniel Finkel's Circle-Toss Game
## 6463 Entering the Void
## Snippet
## 226 Tavi Gevinson, the teenage fashion star turned Broadway actress, wont be much of a player at New York Fashion Week this season.
## 995 Editors have decided to adjust how The Times refer to an Islamic extremist group that controls territory in Syria and Iraq.
## 3327 Hillary Rodham Clinton continued to laugh off questions about her presidential aspirations on Tuesday, but she did shed some light on what she thinks is wrong in Washington.
## 4753 Off Color, a New York Times video series, looks at how artists of color are making sharp social commentary about race in America through comedy and performance.
## 4802 By math educator Daniel Finkel, a puzzle thats childs play. Can you figure it out?
## 6463 The Spanish artist Miquel Barcel closely examines the basic materials of life in response to Edward Hirsch questioning his own belief in a higher power.
## Abstract
## 226 Tavi Gevinson, the teenage fashion star turned Broadway actress, wont be much of a player at New York Fashion Week this season.
## 995 Editors have decided to adjust how The Times refer to an Islamic extremist group that controls territory in Syria and Iraq.
## 3327 Hillary Rodham Clinton continued to laugh off questions about her presidential aspirations on Tuesday, but she did shed some light on what she thinks is wrong in Washington.
## 4753 Off Color, a New York Times video series, looks at how artists of color are making sharp social commentary about race in America through comedy and performance.
## 4802 By math educator Daniel Finkel, a puzzle thats childs play. Can you figure it out?
## 6463 The Spanish artist Miquel Barcel closely examines the basic materials of life in response to Edward Hirsch questioning his own belief in a higher power.
## WordCount PubDate Popular UniqueID
## 226 459 2014-09-04 16:55:57 0 226
## 995 301 2014-09-15 16:05:13 0 995
## 3327 236 2014-10-14 14:45:51 0 3327
## 4753 393 2014-11-02 05:00:13 0 4753
## 4802 1628 2014-11-03 12:00:04 1 4802
## 6463 264 2014-11-27 12:00:09 0 6463
## NewsDesk SectionName SubsectionName
## 6527 Foreign
## 6528 Opinion Room For Debate
## 6529 Foreign
## 6530 TStyle
## 6531 Multimedia
## 6532 Business
## Headline
## 6527 1914: Russians Dominate in East Poland
## 6528 Finding a Secretary of Defense
## 6529 1889: Metropolitan Opera House Reopens in New York
## 6530 The Daily Gift: Picasso Plates for Creative Dining
## 6531 Racing From New York to Barcelona
## 6532 Math Anxiety: Why Hollywood Makes Robots of Alan Turing and Other Geniuses
## Snippet
## 6527 From the International Herald Tribune archives: Russians dominate in East Poland in 1914.
## 6528 If Chuck Hagel isn't the right Pentagon chief to respond to an onslaught of global crises, who is?
## 6529 From the International Herald Tribune archives: The Metropolitan Opera House reopens in New York in 1889.
## 6530 Each day until Christmas, the editors of T share a new holiday gift idea.
## 6531 A sailboat race from New York to Barcelona was the setting for a thrilling and sometimes terrifying video about this challenging sport.
## 6532 The visionary who stares at formulas written on walls or mirrors or better yet, thin air has become a Hollywood trope. So has the depiction of the genius who cant connect with real people.
## Abstract
## 6527 From the International Herald Tribune archives: Russians dominate in East Poland in 1914.
## 6528 If Chuck Hagel isn't the right Pentagon chief to respond to an onslaught of global crises, who is?
## 6529 From the International Herald Tribune archives: The Metropolitan Opera House reopens in New York in 1889.
## 6530 Each day until Christmas, the editors of T share a new holiday gift idea.
## 6531 A sailboat race from New York to Barcelona was the setting for a thrilling and sometimes terrifying video about this challenging sport.
## 6532 The visionary who stares at formulas written on walls or mirrors or better yet, thin air has become a Hollywood trope. So has the depiction of the genius who cant connect with real people.
## WordCount PubDate Popular UniqueID
## 6527 176 2014-11-30 13:48:40 0 6527
## 6528 1597 2014-11-30 13:27:23 0 6528
## 6529 214 2014-11-30 09:44:57 0 6529
## 6530 61 2014-11-30 09:00:43 0 6530
## 6531 441 2014-11-30 09:00:22 0 6531
## 6532 921 2014-11-30 07:00:40 0 6532
## 'data.frame': 6532 obs. of 10 variables:
## $ NewsDesk : chr "Business" "Culture" "Business" "Business" ...
## $ SectionName : chr "Crosswords/Games" "Arts" "Business Day" "Business Day" ...
## $ SubsectionName: chr "" "" "Dealbook" "Dealbook" ...
## $ Headline : chr "More School Daze" "New 96-Page Murakami Work Coming in December" "Public Pension Funds Stay Mum on Corporate Expats" "Boot Camp for Bankers" ...
## $ Snippet : chr "A puzzle from Ethan Cooper that reminds me that a bill is due." "The Strange Library will arrive just three and a half months after Mr. Murakamis latest novel, Colorless Tsukuru Tazaki and His"| __truncated__ "Public pension funds have major stakes in American companies moving overseas to cut their tax bills. But they are saying little"| __truncated__ "As they struggle to find new business to bolster sluggish earnings, banks consider the nations 25 million veterans and service "| __truncated__ ...
## $ Abstract : chr "A puzzle from Ethan Cooper that reminds me that a bill is due." "The Strange Library will arrive just three and a half months after Mr. Murakamis latest novel, Colorless Tsukuru Tazaki and His"| __truncated__ "Public pension funds have major stakes in American companies moving overseas to cut their tax bills. But they are saying little"| __truncated__ "As they struggle to find new business to bolster sluggish earnings, banks consider the nations 25 million veterans and service "| __truncated__ ...
## $ WordCount : int 508 285 1211 1405 181 245 258 893 1077 188 ...
## $ PubDate : chr "2014-09-01 22:00:09" "2014-09-01 21:14:07" "2014-09-01 21:05:36" "2014-09-01 20:43:34" ...
## $ Popular : int 1 0 0 1 1 1 0 1 1 0 ...
## $ UniqueID : int 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, "comment")= chr "glbObsTrn"
## NULL
## [1] "Reading file ./data/NYTimesBlogTest.csv..."
## [1] "dimensions of data in ./data/NYTimesBlogTest.csv: 1,870 rows x 9 cols"
## NewsDesk SectionName SubsectionName
## 1 Culture
## 2 Culture Arts
## 3 Business Crosswords/Games
## 4 Business Business Day Dealbook
## 5 Science Health
## 6 Science Health
## Headline
## 1 'Birdman' Tops the Gothams
## 2 'Sleepy Hollow' Recap: A Not-So-Shocking Death
## 3 Drinking Buddy For Falstaff
## 4 Encouraging Public Service, Through Wall Street's 'Revolving Door'
## 5 Therapy Prevents Repeat Suicide Attempts
## 6 Hoping for a Good Death
## Snippet
## 1 The backstage tale won two awards; Citizenfour, the Edward Snowden documentary, was also a winner.
## 2 In the fall season finale, a question of where the series has many places to go.
## 3 In which Timothy Polin reveals his potty mouth.
## 4 The debate about pay for Wall Street executives who take government jobs appears to be based more on a populist shakedown than on good public policy.
## 5 Short-term psychotherapy may be an effective way to prevent repeated suicide attempts.
## 6 What I hadnt considered before my fathers heart attack was the precise meaning of not wanting to live hooked up to machines.
## Abstract
## 1 The backstage tale won two awards; Citizenfour, the Edward Snowden documentary, was also a winner.
## 2 In the fall season finale, a question of where the series has many places to go.
## 3 In which Timothy Polin reveals his potty mouth.
## 4 The debate about pay for Wall Street executives who take government jobs appears to be based more on a populist shakedown than on good public policy.
## 5 Short-term psychotherapy may be an effective way to prevent repeated suicide attempts.
## 6 What I hadnt considered before my fathers heart attack was the precise meaning of not wanting to live hooked up to machines.
## WordCount PubDate UniqueID
## 1 111 2014-12-01 22:45:24 6533
## 2 558 2014-12-01 22:01:34 6534
## 3 788 2014-12-01 22:00:26 6535
## 4 915 2014-12-01 21:04:13 6536
## 5 213 2014-12-01 19:13:20 6537
## 6 938 2014-12-01 19:05:12 6538
## NewsDesk SectionName SubsectionName
## 3 Business Crosswords/Games
## 334 OpEd Opinion
## 725 TStyle
## 732 Business Business Day Dealbook
## 752 Business Business Day Dealbook
## 864
## Headline
## 3 Drinking Buddy For Falstaff
## 334 Facts & Figures: America’s Unique Take on Maternity Leave
## 725 Ansel Elgort Buttons Up in Brioni
## 732 A Shake-Up as the Financial World Infiltrates Philanthropy
## 752 Coupang, a South Korean E-Commerce Site, Raises $300 Million
## 864 Today in Politics
## Snippet
## 3 In which Timothy Polin reveals his potty mouth.
## 334 In the U.S., paid parental leave is more of a perk than a guarantee.
## 725 The actor brought a tinge of youthfulness to the classic Italian houses retro-tailored look.
## 732 Donor-advised funds help investors get deductions for charitable donations in one year, but society doesnt get the benefit of the money right away.
## 752 The latest financing round underscores Coupangs maturity and its ambitions to one day be a publicly traded company.
## 864 The 113th Congress is concluding with partisan brinksmanship and one last mad scramble for votes to pass a $1.1 trillion spending package.
## Abstract
## 3 In which Timothy Polin reveals his potty mouth.
## 334 In the U.S., paid parental leave is more of a perk than a guarantee.
## 725 The actor brought a tinge of youthfulness to the classic Italian houses retro-tailored look.
## 732 Donor-advised funds help investors get deductions for charitable donations in one year, but society doesnt get the benefit of the money right away.
## 752 The latest financing round underscores Coupangs maturity and its ambitions to one day be a publicly traded company.
## 864 The 113th Congress is concluding with partisan brinksmanship and one last mad scramble for votes to pass a $1.1 trillion spending package.
## WordCount PubDate UniqueID
## 3 788 2014-12-01 22:00:26 6535
## 334 160 2014-12-04 11:45:20 6866
## 725 89 2014-12-10 12:30:47 7257
## 732 1172 2014-12-10 12:00:38 7264
## 752 353 2014-12-10 08:30:41 7284
## 864 1544 2014-12-11 07:09:25 7396
## NewsDesk SectionName SubsectionName
## 1865
## 1866 Business Technology
## 1867 Metro N.Y. / Region
## 1868 Multimedia
## 1869 Foreign World Asia Pacific
## 1870 Science Health
## Headline
## 1865 Today in Politics
## 1866 Uber Suspends Operations in Spain
## 1867 New York Today: The Year in News
## 1868 New Year, Old Memories, in Times Square
## 1869 Hong Kong Police Criticized After 14-Year-Old's Detention
## 1870 The Super-Short Workout and Other Fitness Trends
## Snippet
## 1865 House Republicans are ending the year on a defensive note over Representative Steve Scalises 2002 speech to a white supremacist group.
## 1866 In a first in the growing pushback against Ubers global expansion, a judges ruling barred telecommunications operators and banks from supporting the companys services.
## 1867 Wednesday: The most read stories of 2014, teeth-chattering cold, and its New Years Eve.
## 1868 What happens when you combine Burning Man, Independence Day fireworks, the last day of school and a full-contact Black Friday sale-a-bration? New Years Eve in Times Square.
## 1869 The authorities have been accused of trying to intimidate young pro-democracy protesters and their families after a 14-year-old girl was detained on suspicion of drawing flowers in chalk near government headquarters and sent to a juvenile home.
## 1870 The big story in exercise science this year was the super-short workout, although many other fitness-related themes emerged in 2014.
## Abstract
## 1865 House Republicans are ending the year on a defensive note over Representative Steve Scalises 2002 speech to a white supremacist group.
## 1866 In a first in the growing pushback against Ubers global expansion, a judges ruling barred telecommunications operators and banks from supporting the companys services.
## 1867 Wednesday: The most read stories of 2014, teeth-chattering cold, and its New Years Eve.
## 1868 What happens when you combine Burning Man, Independence Day fireworks, the last day of school and a full-contact Black Friday sale-a-bration? New Years Eve in Times Square.
## 1869 The authorities have been accused of trying to intimidate young pro-democracy protesters and their families after a 14-year-old girl was detained on suspicion of drawing flowers in chalk near government headquarters and sent to a juvenile home.
## 1870 The big story in exercise science this year was the super-short workout, although many other fitness-related themes emerged in 2014.
## WordCount PubDate UniqueID
## 1865 1616 2014-12-31 07:03:46 8397
## 1866 292 2014-12-31 06:09:32 8398
## 1867 1010 2014-12-31 06:06:58 8399
## 1868 387 2014-12-31 05:00:19 8400
## 1869 717 2014-12-31 04:16:29 8401
## 1870 818 2014-12-31 00:01:10 8402
## 'data.frame': 1870 obs. of 9 variables:
## $ NewsDesk : chr "Culture" "Culture" "Business" "Business" ...
## $ SectionName : chr "" "Arts" "Crosswords/Games" "Business Day" ...
## $ SubsectionName: chr "" "" "" "Dealbook" ...
## $ Headline : chr "'Birdman' Tops the Gothams" "'Sleepy Hollow' Recap: A Not-So-Shocking Death" "Drinking Buddy For Falstaff" "Encouraging Public Service, Through Wall Street's 'Revolving Door'" ...
## $ Snippet : chr "The backstage tale won two awards; Citizenfour, the Edward Snowden documentary, was also a winner." "In the fall season finale, a question of where the series has many places to go." "In which Timothy Polin reveals his potty mouth." "The debate about pay for Wall Street executives who take government jobs appears to be based more on a populist shakedown than "| __truncated__ ...
## $ Abstract : chr "The backstage tale won two awards; Citizenfour, the Edward Snowden documentary, was also a winner." "In the fall season finale, a question of where the series has many places to go." "In which Timothy Polin reveals his potty mouth." "The debate about pay for Wall Street executives who take government jobs appears to be based more on a populist shakedown than "| __truncated__ ...
## $ WordCount : int 111 558 788 915 213 938 1336 2644 752 99 ...
## $ PubDate : chr "2014-12-01 22:45:24" "2014-12-01 22:01:34" "2014-12-01 22:00:26" "2014-12-01 21:04:13" ...
## $ UniqueID : int 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 ...
## - attr(*, "comment")= chr "glbObsNew"
## NULL
## [1] "Partition stats:"
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: tcltk
## Popular .src .n
## 1 0 Train 5439
## 2 NA Test 1870
## 3 1 Train 1093
## Popular .src .n
## 1 0 Train 5439
## 2 NA Test 1870
## 3 1 Train 1093
## .src .n
## 1 Train 6532
## 2 Test 1870
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: lazyeval
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
##
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
##
## The following object is masked from 'package:stats':
##
## nobs
##
## The following object is masked from 'package:utils':
##
## object.size
## [1] "Found 0 duplicates by all features:"
## NULL
## [1] "Partition stats:"
## Popular .src .n
## 1 0 Train 5439
## 2 NA Test 1870
## 3 1 Train 1093
## Popular .src .n
## 1 0 Train 5439
## 2 NA Test 1870
## 3 1 Train 1093
## .src .n
## 1 Train 6532
## 2 Test 1870
## label step_major step_minor label_minor bgn end elapsed
## 1 import.data 1 0 0 5.077 9.058 3.981
## 2 inspect.data 2 0 0 9.058 NA NA
2.0: inspect data## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1870 rows containing non-finite values (stat_bin).
## Loading required package: reshape2
## Popular.0 Popular.1 Popular.NA
## Test NA NA 1870
## Train 5439 1093 NA
## Popular.0 Popular.1 Popular.NA
## Test NA NA 1
## Train 0.8326699 0.1673301 NA
## [1] "numeric data missing in glbObsAll: "
## Popular
## 1870
## [1] "numeric data w/ 0s in glbObsAll: "
## WordCount Popular
## 109 5439
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## NewsDesk SectionName SubsectionName Headline Snippet
## 2408 2899 6176 0 13
## Abstract PubDate
## 17 0
## Popular Popular.fctr .n
## 1 0 N 5439
## 2 NA <NA> 1870
## 3 1 Y 1093
## Warning: Removed 1 rows containing missing values (position_stack).
## Popular.fctr.N Popular.fctr.Y Popular.fctr.NA
## Test NA NA 1870
## Train 5439 1093 NA
## Popular.fctr.N Popular.fctr.Y Popular.fctr.NA
## Test NA NA 1
## Train 0.8326699 0.1673301 NA
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## label step_major step_minor label_minor bgn end elapsed
## 2 inspect.data 2 0 0 9.058 11.941 2.883
## 3 scrub.data 2 1 1 11.941 NA NA
2.1: scrub data## [1] "numeric data missing in glbObsAll: "
## Popular Popular.fctr
## 1870 1870
## [1] "numeric data w/ 0s in glbObsAll: "
## WordCount Popular
## 109 5439
## [1] "numeric data w/ Infs in glbObsAll: "
## named integer(0)
## [1] "numeric data w/ NaNs in glbObsAll: "
## named integer(0)
## [1] "string data missing in glbObsAll: "
## NewsDesk SectionName SubsectionName Headline Snippet
## 2408 2899 6176 0 13
## Abstract PubDate
## 17 0
## label step_major step_minor label_minor bgn end elapsed
## 3 scrub.data 2 1 1 11.941 12.708 0.767
## 4 transform.data 2 2 2 12.709 NA NA
2.2: transform data## [1] "Creating new feature: NDSSName.my..."
## [1] "Creating new feature: WordCount.log1p..."
## [1] "Creating new feature: WordCount.root2..."
## [1] "Creating new feature: WordCount.nexp..."
## label step_major step_minor label_minor bgn end elapsed
## 4 transform.data 2 2 2 12.709 13.016 0.307
## 5 extract.features 3 0 0 13.016 NA NA
3.0: extract features## label step_major step_minor label_minor bgn end elapsed
## 1 extract.features_bgn 1 0 0 13.07 NA NA
## label step_major step_minor label_minor
## 1 extract.features_bgn 1 0 0
## 2 extract.features_factorize.str.vars 2 0 0
## bgn end elapsed
## 1 13.07 13.08 0.01
## 2 13.08 NA NA
## NewsDesk SectionName SubsectionName Headline
## "NewsDesk" "SectionName" "SubsectionName" "Headline"
## Snippet Abstract PubDate .src
## "Snippet" "Abstract" "PubDate" ".src"
## NDSSName.my
## "NDSSName.my"
## Warning: Creating factors of string variable: NDSSName.my: # of unique
## values: 21
## label step_major step_minor label_minor
## 2 extract.features_factorize.str.vars 2 0 0
## 3 extract.features_xtract.DateTime.vars 3 0 0
## bgn end elapsed
## 2 13.080 13.097 0.017
## 3 13.097 NA NA
## [1] "Extracting features from DateTime(s): PubDate"
## Loading required package: XML
## [1] "**********"
## [1] "Consider adding state & city holidays for glbFeatsDateTime: PubDate"
## [1] "**********"
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## [1] "Missing data for numerics:"
## PubDate.last32.log1p.ctg
## 30
## label step_major step_minor label_minor
## 3 extract.features_xtract.DateTime.vars 3 0 0
## 4 extract.features_end 4 0 0
## bgn end elapsed
## 3 13.097 18.274 5.177
## 4 18.274 NA NA
## label step_major step_minor label_minor
## 3 extract.features_xtract.DateTime.vars 3 0 0
## 2 extract.features_factorize.str.vars 2 0 0
## 1 extract.features_bgn 1 0 0
## bgn end elapsed duration
## 3 13.097 18.274 5.177 5.177
## 2 13.080 13.097 0.017 0.017
## 1 13.070 13.080 0.010 0.010
## [1] "Total Elapsed Time: 18.274 secs"
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## label step_major step_minor label_minor bgn end
## 5 extract.features 3 0 0 13.016 19.534
## 6 manage.missing.data 3 1 1 19.535 NA
## elapsed
## 5 6.518
## 6 NA
3.1: manage missing data## [1] "numeric data missing in : "
## Popular Popular.fctr
## 1870 1870
## [1] "numeric data w/ 0s in : "
## WordCount Popular WordCount.log1p
## 109 5439 109
## WordCount.root2 WordCount.nexp PubDate.wkday.fctr
## 109 2044 378
## PubDate.wkend PubDate.hlday PubDate.day.minutes
## 7787 8160 5
## PubDate.last2.log1p PubDate.last4.log1p PubDate.last8.log1p
## 2 4 8
## PubDate.last16.log1p PubDate.last32.log1p PubDate.last2.log1p.ctg
## 16 32 42
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg PubDate.last16.log1p.ctg
## 84 168 336
## PubDate.last32.log1p.ctg
## 670
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## NewsDesk SectionName SubsectionName Headline Snippet
## 2408 2899 6176 0 13
## Abstract PubDate NDSSName.my
## 17 0 0
## [1] "numeric data missing in : "
## Popular Popular.fctr
## 1870 1870
## [1] "numeric data w/ 0s in : "
## WordCount Popular WordCount.log1p
## 109 5439 109
## WordCount.root2 WordCount.nexp PubDate.wkday.fctr
## 109 2044 378
## PubDate.wkend PubDate.hlday PubDate.day.minutes
## 7787 8160 5
## PubDate.last2.log1p PubDate.last4.log1p PubDate.last8.log1p
## 2 4 8
## PubDate.last16.log1p PubDate.last32.log1p PubDate.last2.log1p.ctg
## 16 32 42
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg PubDate.last16.log1p.ctg
## 84 168 336
## PubDate.last32.log1p.ctg
## 670
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## NewsDesk SectionName SubsectionName Headline Snippet
## 2408 2899 6176 0 13
## Abstract PubDate NDSSName.my
## 17 0 0
## label step_major step_minor label_minor bgn end
## 6 manage.missing.data 3 1 1 19.535 20.595
## 7 cluster.data 3 2 2 20.595 NA
## elapsed
## 6 1.06
## 7 NA
3.2: cluster datamycompute_entropy_df <- function(obs_df, entropy_var, by_var=NULL) {
require(lazyeval)
require(dplyr)
require(tidyr)
if (is.null(by_var)) {
by_var <- ".default"
obs_df$.default <- as.factor(".default")
}
if (!any(grepl(".clusterid", names(obs_df), fixed=TRUE)))
obs_df$.clusterid <- 1
cluster_df <- obs_df %>%
count_(c(by_var, ".clusterid", entropy_var)) %>%
dplyr::filter(n > 0) %>%
dplyr::filter_(interp(~(!is.na(var)), var=as.name(entropy_var))) %>%
unite_(paste0(by_var, ".clusterid"),
c(interp(by_var), ".clusterid")) %>%
spread_(interp(entropy_var), "n", fill=0)
# head(cluster_df)
# sum(cluster_df$n)
tmp.entropy <- sapply(1:nrow(cluster_df),
function(row) entropy(as.numeric(cluster_df[row, -1]), method = "ML"))
tmp.knt <- sapply(1:nrow(cluster_df),
function(row) sum(as.numeric(cluster_df[row, -1])))
cluster_df$.entropy <- tmp.entropy; cluster_df$.knt <- tmp.knt
#print(cluster_df)
return(cluster_df)
}
if (glb_cluster) {
require(proxy)
#require(hash)
require(dynamicTreeCut)
require(entropy)
require(tidyr)
require(ggdendro)
mywgtdcosine_dist <- function(x, y=NULL, weights=NULL) {
if (!inherits(x, "matrix"))
x <- as.matrix(x)
if (is.null(weights))
weights <- rep(1, ncol(x))
wgtsx <- matrix(rep(weights / sum(weights), nrow(x)), nrow = nrow(x),
byrow = TRUE)
wgtdx <- x * wgtsx
wgtdxsqsum <- as.matrix(rowSums((x ^ 2) * wgtsx), byrow=FALSE)
denom <- sqrt(wgtdxsqsum %*% t(wgtdxsqsum))
ret_mtrx <- 1 - ((sum(weights) ^ 1) * (wgtdx %*% t(wgtdx)) / denom)
ret_mtrx[is.nan(ret_mtrx)] <- 1
diag(ret_mtrx) <- 0
return(ret_mtrx)
}
#pr_DB$delete_entry("mywgtdcosine");
# Need to do this only once across runs ?
if (!pr_DB$entry_exists("mywgtdcosine")) {
pr_DB$set_entry(FUN = mywgtdcosine_dist, names = c("mywgtdcosine"))
pr_DB$modify_entry(names="mywgtdcosine", type="metric", loop=FALSE)
}
#pr_DB$get_entry("mywgtdcosine")
# glb_hash <- hash(key=unique(glbObsAll$myCategory),
# values=1:length(unique(glbObsAll$myCategory)))
# glb_hash_lst <- hash(key=unique(glbObsAll$myCategory),
# values=1:length(unique(glbObsAll$myCategory)))
#stop(here"); glb2Sav(); glbObsAll <- savObsAll
cluster_vars <- grep(paste0("[",
toupper(paste0(substr(glbFeatsText, 1, 1), collapse = "")),
"]\\.[PT]\\."),
names(glbObsAll), value = TRUE)
# Assign correlations with rsp_var as weights for cosine distance
print("Clustering features: ")
cluster_vars_df <- data.frame(abs.cor.y = abs(cor(
glbObsAll[glbObsAll$.src == "Train", cluster_vars],
as.numeric(glbObsAll[glbObsAll$.src == "Train", glb_rsp_var]),
use = "pairwise.complete.obs")))
print(tail(cluster_vars_df <- orderBy(~ abs.cor.y,
subset(cluster_vars_df, !is.na(abs.cor.y))), 5))
print(sprintf(" .rnorm cor: %0.4f",
cor(glbObsAll[glbObsAll$.src == "Train", ".rnorm"],
as.numeric(glbObsAll[glbObsAll$.src == "Train", glb_rsp_var]),
use = "pairwise.complete.obs")))
print(sprintf("glbObsAll Entropy: %0.4f",
allobs_ent <- entropy(table(glbObsAll[, glb_cluster_entropy_var]),
method="ML")))
print(category_df <- mycompute_entropy_df(obs_df=glbObsAll,
entropy_var=glb_cluster_entropy_var,
by_var=glbFeatsCategory))
print(sprintf("glbObsAll$%s Entropy: %0.4f (%0.4f pct)",
glbFeatsCategory,
category_ent <- weighted.mean(category_df$.entropy, category_df$.knt),
100 * category_ent / allobs_ent))
glbObsAll$.clusterid <- 1
#print(max(table(glbObsAll$myCategory.fctr) / 20))
#stop(here"); glb2Sav()
grp_ids <- sort(unique(glbObsAll[, glbFeatsCategory]))
glb_cluster_size_df_lst <- list()
png(paste0(glb_out_pfx, "FeatsTxtClusters.png"),
width = 480 * 2, height = 480 * length(grp_ids))
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow = length(grp_ids), ncol = 2)))
pltIx <- 1
for (grp in grp_ids) {
# if (grep(grp, levels(grp_ids)) <= 6) next
# if (grep(grp, levels(grp_ids)) > 9) next
# if (grep(grp, levels(grp_ids)) != 10) next
print(sprintf("Category: %s", grp))
ctgry_allobs_df <- glbObsAll[glbObsAll[, glbFeatsCategory] == grp, ]
if (!inherits(ctgry_allobs_df[, glb_cluster_entropy_var], "factor"))
ctgry_allobs_df[, glb_cluster_entropy_var] <-
as.factor(ctgry_allobs_df[, glb_cluster_entropy_var])
#dstns_dist <- proxy::dist(ctgry_allobs_df[, cluster_vars], method = "cosine")
dstns_dist <- proxy::dist(ctgry_allobs_df[, row.names(cluster_vars_df)],
method = "mywgtdcosine",
weights = cluster_vars_df$abs.cor.y)
# Custom distance functions return a crossdist object
#dstns_mtrx <- as.matrix(dstns_dist)
dstns_mtrx <- matrix(as.vector(dstns_dist), nrow=attr(dstns_dist, "dim")[1],
dimnames=attr(dstns_dist, "dimnames"))
dstns_dist <- as.dist(dstns_mtrx)
print(sprintf("max distance(%0.4f) pair:", max(dstns_mtrx)))
# print(dim(dstns_mtrx))
# print(sprintf("which.max: %d", which.max(dstns_mtrx)))
row_ix <- ceiling(which.max(dstns_mtrx) / ncol(dstns_mtrx))
col_ix <- which.max(dstns_mtrx[row_ix, ])
# print(sprintf("row_ix: %d", row_ix)); print(sprintf("col_ix: %d", col_ix));
# print(dim(ctgry_allobs_df))
print(ctgry_allobs_df[c(row_ix, col_ix),
c(glb_id_var, glb_cluster_entropy_var, glbFeatsCategory, glbFeatsText, cluster_vars)])
min_dstns_mtrx <- dstns_mtrx
diag(min_dstns_mtrx) <- 1
# Float representations issue -2.22e-16 vs. 0.0000
print(sprintf("min distance(%0.4f) pair:", min(min_dstns_mtrx)))
row_ix <- ceiling(which.min(min_dstns_mtrx) / ncol(min_dstns_mtrx))
col_ix <- which.min(min_dstns_mtrx[row_ix, ])
print(ctgry_allobs_df[c(row_ix, col_ix),
c(glb_id_var, glb_cluster_entropy_var, glbFeatsCategory, glbFeatsText,
cluster_vars)])
set.seed(glb_cluster.seed)
clusters <- hclust(dstns_dist, method = "ward.D2")
# Workaround to avoid "Error in cutree(dendro, h = heightcutoff) : the 'height' component of 'tree' is not sorted (increasingly)"
if (with(clusters,all.equal(height,sort(height))))
clusters$height <- round(clusters$height,6)
clusters$labels <- ctgry_allobs_df[, glb_id_var]
clustersDD <- dendro_data(clusters)
clustersDD$labels[, glb_rsp_var] <- sapply(clustersDD$labels$label, function(id)
ctgry_allobs_df[id == ctgry_allobs_df[, glb_id_var], glb_rsp_var])
print(ggdendrogram(clustersDD, rotate = TRUE, size = 2) +
geom_point(data = clustersDD$labels,
aes_string(x = "x", color = glb_rsp_var), y = min(clustersDD$segments$y)) +
coord_flip(ylim = c(min(clustersDD$segments$y),
max(clustersDD$segments$y))) +
ggtitle(grp),
vp = viewport(layout.pos.row = pltIx, layout.pos.col = 1))
# clusters$labels <- ctgry_allobs_df[, glb_id_var]
# clustersDD <- dendro_data(clusters)
# clustersDD$labels$color <- sapply(clustersDD$labels$label, function(id)
# ctgry_allobs_df[id == ctgry_allobs_df[, glb_id_var], glb_rsp_var])
# print(ggdendrogram(clustersDD, rotate = TRUE, size = 2) +
# geom_point(data = clustersDD$labels,
# aes_string(x = "x", color = "color"), y = min(clustersDD$segments$y)) +
# coord_flip(ylim = c(min(clustersDD$segments$y),
# max(clustersDD$segments$y))))
# print(ggdendrogram(clustersDD, rotate = TRUE, size = 2) +
# geom_point(data = clustersDD$labels,
# aes_string(x = "x", y = "y", color = "color")))
# myplclust(clusters, lab=ctgry_allobs_df[, glb_id_var],
# lab.col=unclass(ctgry_allobs_df[, glb_cluster_entropy_var]))
opt_minclustersize_df <- data.frame(minclustersize = nrow(ctgry_allobs_df),
entropy = entropy(table(ctgry_allobs_df[, glb_cluster_entropy_var]),
method = "ML"))
for (minclustersize in
as.integer(seq(nrow(ctgry_allobs_df) / 2, nrow(ctgry_allobs_df) / 10,
length = 5))) {
clusterGroups <- cutreeDynamic(clusters, minClusterSize = minclustersize,
method = "tree", deepSplit = 0)
# Unassigned groups are labeled 0; the largest group has label 1
clusterGroups[clusterGroups == 0] <- 1
ctgry_allobs_df$.clusterid <- clusterGroups
ctgry_clstrs_df <- mycompute_entropy_df(ctgry_allobs_df,
glb_cluster_entropy_var)
opt_minclustersize_df <- rbind(opt_minclustersize_df,
data.frame(minclustersize = minclustersize,
entropy = weighted.mean(ctgry_clstrs_df$.entropy, ctgry_clstrs_df$.knt)))
}
opt_minclustersize <-
opt_minclustersize_df$minclustersize[which.min(opt_minclustersize_df$entropy)]
opt_minclustersize_df$.color <-
ifelse(opt_minclustersize_df$minclustersize == opt_minclustersize,
"red", "blue")
print(ggplot(data = opt_minclustersize_df,
mapping = aes(x = minclustersize, y = entropy)) +
geom_point(aes(color = .color)) + scale_color_identity() +
guides(color = "none") + geom_line(),
vp = viewport(layout.pos.row = pltIx, layout.pos.col = 2))
glb_cluster_size_df_lst[[grp]] <- opt_minclustersize_df
# select minclustersize that minimizes entropy
clusterGroups <- cutreeDynamic(clusters, minClusterSize = opt_minclustersize,
method = "tree",
deepSplit = 0)
# Unassigned groups are labeled 0; the largest group has label 1
table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var],
useNA = "ifany")
clusterGroups[clusterGroups == 0] <- 1
table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA = "ifany")
glbObsAll[glbObsAll[, glbFeatsCategory] == grp,]$.clusterid <-
clusterGroups
pltIx <- pltIx + 1
}
dev.off()
#all.equal(savObsAll_clusterid, glbObsAll$.clusterid)
print(cluster_df <- mycompute_entropy_df(obs_df=glbObsAll,
entropy_var=glb_cluster_entropy_var,
by_var=glbFeatsCategory))
print(sprintf("glbObsAll$%s$.clusterid Entropy: %0.4f (%0.4f pct)",
glbFeatsCategory,
cluster_ent <- weighted.mean(cluster_df$.entropy, cluster_df$.knt),
100 * cluster_ent / category_ent))
glbObsAll$.clusterid.fctr <- as.factor(glbObsAll$.clusterid)
# .clusterid.fctr is created automatically (probably ?) later
glbFeatsExclude <- c(glbFeatsExclude, ".clusterid")
if (!is.null(glbFeatsCategory))
# glbFeatsInteractionOnly[ifelse(grepl("\\.fctr", glbFeatsCategory),
# glbFeatsCategory,
# paste0(glbFeatsCategory, ".fctr"))] <-
# c(".clusterid.fctr")
glbFeatsInteractionOnly[[".clusterid.fctr"]] <-
ifelse(grepl("\\.fctr", glbFeatsCategory), glbFeatsCategory,
paste0(glbFeatsCategory, ".fctr"))
if (glbFeatsTextClusterVarsExclude)
glbFeatsExclude <- c(glbFeatsExclude, cluster_vars)
}
# Last call for data modifications
#stop(here") # savObsAll <- glbObsAll
# glbObsAll[(glbObsAll$PropR == 0.75) & (glbObsAll$State == "Hawaii"), "PropR.fctr"] <- "N"
# Re-partition
glbObsTrn <- subset(glbObsAll, .src == "Train")
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
glbObsNew <- subset(glbObsAll, .src == "Test")
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
glb_chunks_df <- myadd_chunk(glb_chunks_df, "partition.data.training", major.inc = TRUE)
## label step_major step_minor label_minor bgn end
## 7 cluster.data 3 2 2 20.595 20.915
## 8 partition.data.training 4 0 0 20.916 NA
## elapsed
## 7 0.32
## 8 NA
4.0: partition data training## [1] "Prediction Hints by Catgeory:"
## NDSSName.my.fctr Popular.0 Popular.1 .n.tst .strata.0 .strata.1
## 5 #U.S.#Education 325 NA 89 82 17
## 10 Culture## 1 NA 70 1 13
## 12 Foreign#World# 172 NA 47 44 9
## 21 myOther 38 NA 5 5 1
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Loading required package: sampling
##
## Attaching package: 'sampling'
##
## The following objects are masked from 'package:survival':
##
## cluster, strata
##
## The following object is masked from 'package:caret':
##
## cluster
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Popular.0 Popular.1 Popular.NA
## NA NA 1870
## Fit 3941 863 NA
## OOB 1498 230 NA
## Popular.0 Popular.1 Popular.NA
## NA NA 1
## Fit 0.8203580 0.1796420 NA
## OOB 0.8668981 0.1331019 NA
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## NDSSName.my.fctr .n.Fit .n.OOB .n.Tst .freqRatio.Fit
## 1 ## 913 371 342 0.190049958
## 6 Business#BusinessDay#Dealbook 629 323 304 0.130932556
## 11 Culture#Arts# 490 185 174 0.101998335
## 15 OpEd#Opinion# 437 89 164 0.090965862
## 9 Business#Technology# 213 126 114 0.044338052
## 19 TStyle## 623 101 105 0.129683597
## 5 #U.S.#Education 243 82 89 0.050582848
## 10 Culture## NA 1 70 NA
## 14 Metro#N.Y./Region# 128 70 67 0.026644463
## 18 Styles#U.S.# 127 50 61 0.026436303
## 16 Science#Health# 148 48 57 0.030807660
## 13 Foreign#World#AsiaPacific 150 53 56 0.031223980
## 2 #Multimedia# 92 49 52 0.019150708
## 12 Foreign#World# 128 44 47 0.026644463
## 8 Business#Crosswords/Games# 105 18 42 0.021856786
## 7 Business#BusinessDay#SmallBusiness 100 40 41 0.020815987
## 20 Travel#Travel# 83 34 35 0.017277269
## 3 #Opinion#RoomForDebate 42 20 20 0.008742714
## 17 Styles##Fashion 104 15 15 0.021648626
## 4 #Opinion#ThePublicEditor 16 4 10 0.003330558
## 21 myOther 33 5 5 0.006869276
## .freqRatio.OOB .freqRatio.Tst
## 1 0.2146990741 0.182887701
## 6 0.1869212963 0.162566845
## 11 0.1070601852 0.093048128
## 15 0.0515046296 0.087700535
## 9 0.0729166667 0.060962567
## 19 0.0584490741 0.056149733
## 5 0.0474537037 0.047593583
## 10 0.0005787037 0.037433155
## 14 0.0405092593 0.035828877
## 18 0.0289351852 0.032620321
## 16 0.0277777778 0.030481283
## 13 0.0306712963 0.029946524
## 2 0.0283564815 0.027807487
## 12 0.0254629630 0.025133690
## 8 0.0104166667 0.022459893
## 7 0.0231481481 0.021925134
## 20 0.0196759259 0.018716578
## 3 0.0115740741 0.010695187
## 17 0.0086805556 0.008021390
## 4 0.0023148148 0.005347594
## 21 0.0028935185 0.002673797
## [1] "glbObsAll: "
## [1] 8402 53
## [1] "glbObsTrn: "
## [1] 6532 53
## [1] "glbObsFit: "
## [1] 4804 52
## [1] "glbObsOOB: "
## [1] 1728 52
## [1] "glbObsNew: "
## [1] 1870 52
## Warning in rm(split): object 'split' not found
## label step_major step_minor label_minor bgn end
## 8 partition.data.training 4 0 0 20.916 22.267
## 9 select.features 5 0 0 22.267 NA
## elapsed
## 8 1.351
## 9 NA
5.0: select features## Warning in cor(data.matrix(entity_df[, sel_feats]), y =
## as.numeric(entity_df[, : the standard deviation is zero
## id cor.y
## Popular Popular 1.000000000
## WordCount.root2 WordCount.root2 0.292120679
## WordCount WordCount 0.257526549
## WordCount.log1p WordCount.log1p 0.254319628
## NDSSName.my.fctr NDSSName.my.fctr 0.165445970
## PubDate.day.minutes PubDate.day.minutes 0.156753478
## PubDate.day.minutes.poly.1 PubDate.day.minutes.poly.1 0.156753478
## PubDate.hour.fctr PubDate.hour.fctr 0.135436805
## PubDate.wkend PubDate.wkend 0.104707290
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4 0.073941394
## PubDate.day.minutes.poly.2 PubDate.day.minutes.poly.2 0.070977720
## PubDate.last4.log1p PubDate.last4.log1p 0.066473282
## PubDate.last2.log1p PubDate.last2.log1p 0.063068716
## PubDate.day.minutes.poly.5 PubDate.day.minutes.poly.5 -0.055929231
## PubDate.last8.log1p PubDate.last8.log1p 0.054458821
## WordCount.nexp WordCount.nexp -0.053208396
## PubDate.last16.log1p PubDate.last16.log1p 0.040735543
## PubDate.wkday.fctr PubDate.wkday.fctr -0.039801288
## PubDate.minute.fctr PubDate.minute.fctr -0.034073846
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.3 0.027983551
## PubDate.zoo.ctg PubDate.zoo.ctg 0.022782795
## PubDate.month.fctr PubDate.month.fctr 0.019148739
## PubDate.POSIX PubDate.POSIX 0.015683258
## PubDate.last32.log1p.ctg PubDate.last32.log1p.ctg 0.015395971
## PubDate.day.minutes.poly.3.ctg PubDate.day.minutes.poly.3.ctg 0.014982807
## PubDate.hlday PubDate.hlday 0.014690122
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.4.ctg 0.014601521
## PubDate.day.minutes.poly.5.ctg PubDate.day.minutes.poly.5.ctg 0.014574775
## PubDate.juliandate PubDate.juliandate 0.014361075
## PubDate.zoo PubDate.zoo 0.013260902
## PubDate.second.fctr PubDate.second.fctr -0.011879458
## UniqueID UniqueID 0.011824920
## PubDate.date.fctr PubDate.date.fctr -0.011647558
## .rnorm .rnorm 0.008212201
## PubDate.last16.log1p.ctg PubDate.last16.log1p.ctg 0.007783530
## PubDate.last2.log1p.ctg PubDate.last2.log1p.ctg 0.006916600
## PubDate.last4.log1p.ctg PubDate.last4.log1p.ctg 0.004792781
## PubDate.last8.log1p.ctg PubDate.last8.log1p.ctg 0.003914960
## PubDate.day.minutes.poly.2.ctg PubDate.day.minutes.poly.2.ctg 0.003596414
## PubDate.last32.log1p PubDate.last32.log1p 0.003558081
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## PubDate.year.fctr PubDate.year.fctr NA
## exclude.as.feat cor.y.abs
## Popular 1 1.000000000
## WordCount.root2 0 0.292120679
## WordCount 1 0.257526549
## WordCount.log1p 0 0.254319628
## NDSSName.my.fctr 0 0.165445970
## PubDate.day.minutes 1 0.156753478
## PubDate.day.minutes.poly.1 0 0.156753478
## PubDate.hour.fctr 0 0.135436805
## PubDate.wkend 0 0.104707290
## PubDate.day.minutes.poly.4 0 0.073941394
## PubDate.day.minutes.poly.2 0 0.070977720
## PubDate.last4.log1p 0 0.066473282
## PubDate.last2.log1p 0 0.063068716
## PubDate.day.minutes.poly.5 0 0.055929231
## PubDate.last8.log1p 0 0.054458821
## WordCount.nexp 0 0.053208396
## PubDate.last16.log1p 0 0.040735543
## PubDate.wkday.fctr 0 0.039801288
## PubDate.minute.fctr 0 0.034073846
## PubDate.day.minutes.poly.3 0 0.027983551
## PubDate.zoo.ctg 1 0.022782795
## PubDate.month.fctr 0 0.019148739
## PubDate.POSIX 1 0.015683258
## PubDate.last32.log1p.ctg 0 0.015395971
## PubDate.day.minutes.poly.3.ctg 0 0.014982807
## PubDate.hlday 0 0.014690122
## PubDate.day.minutes.poly.4.ctg 0 0.014601521
## PubDate.day.minutes.poly.5.ctg 0 0.014574775
## PubDate.juliandate 0 0.014361075
## PubDate.zoo 1 0.013260902
## PubDate.second.fctr 0 0.011879458
## UniqueID 1 0.011824920
## PubDate.date.fctr 0 0.011647558
## .rnorm 0 0.008212201
## PubDate.last16.log1p.ctg 0 0.007783530
## PubDate.last2.log1p.ctg 0 0.006916600
## PubDate.last4.log1p.ctg 0 0.004792781
## PubDate.last8.log1p.ctg 0 0.003914960
## PubDate.day.minutes.poly.2.ctg 0 0.003596414
## PubDate.last32.log1p 0 0.003558081
## PubDate.day.minutes.poly.1.ctg 0 0.002432289
## PubDate.year.fctr 0 NA
## [1] "cor(PubDate.juliandate, PubDate.month.fctr)=0.9393"
## [1] "cor(Popular.fctr, PubDate.juliandate)=0.0144"
## [1] "cor(Popular.fctr, PubDate.month.fctr)=0.0191"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified PubDate.juliandate as highly correlated with
## PubDate.month.fctr
## [1] "cor(PubDate.day.minutes.poly.1, PubDate.hour.fctr)=0.9026"
## [1] "cor(Popular.fctr, PubDate.day.minutes.poly.1)=0.1568"
## [1] "cor(Popular.fctr, PubDate.hour.fctr)=0.1354"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified PubDate.hour.fctr as highly correlated with
## PubDate.day.minutes.poly.1
## [1] "cor(WordCount.log1p, WordCount.root2)=0.8906"
## [1] "cor(Popular.fctr, WordCount.log1p)=0.2543"
## [1] "cor(Popular.fctr, WordCount.root2)=0.2921"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified WordCount.log1p as highly correlated with
## WordCount.root2
## [1] "cor(PubDate.last4.log1p, PubDate.last8.log1p)=0.8253"
## [1] "cor(Popular.fctr, PubDate.last4.log1p)=0.0665"
## [1] "cor(Popular.fctr, PubDate.last8.log1p)=0.0545"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified PubDate.last8.log1p as highly correlated with
## PubDate.last4.log1p
## [1] "cor(PubDate.last2.log1p, PubDate.last4.log1p)=0.7598"
## [1] "cor(Popular.fctr, PubDate.last2.log1p)=0.0631"
## [1] "cor(Popular.fctr, PubDate.last4.log1p)=0.0665"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glbObsTrn, : Identified PubDate.last2.log1p as highly correlated with
## PubDate.last4.log1p
## id cor.y
## Popular Popular 1.000000000
## WordCount.root2 WordCount.root2 0.292120679
## WordCount WordCount 0.257526549
## WordCount.log1p WordCount.log1p 0.254319628
## NDSSName.my.fctr NDSSName.my.fctr 0.165445970
## PubDate.day.minutes PubDate.day.minutes 0.156753478
## PubDate.day.minutes.poly.1 PubDate.day.minutes.poly.1 0.156753478
## PubDate.hour.fctr PubDate.hour.fctr 0.135436805
## PubDate.wkend PubDate.wkend 0.104707290
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4 0.073941394
## PubDate.day.minutes.poly.2 PubDate.day.minutes.poly.2 0.070977720
## PubDate.last4.log1p PubDate.last4.log1p 0.066473282
## PubDate.last2.log1p PubDate.last2.log1p 0.063068716
## PubDate.last8.log1p PubDate.last8.log1p 0.054458821
## PubDate.last16.log1p PubDate.last16.log1p 0.040735543
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.3 0.027983551
## PubDate.zoo.ctg PubDate.zoo.ctg 0.022782795
## PubDate.month.fctr PubDate.month.fctr 0.019148739
## PubDate.POSIX PubDate.POSIX 0.015683258
## PubDate.last32.log1p.ctg PubDate.last32.log1p.ctg 0.015395971
## PubDate.day.minutes.poly.3.ctg PubDate.day.minutes.poly.3.ctg 0.014982807
## PubDate.hlday PubDate.hlday 0.014690122
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.4.ctg 0.014601521
## PubDate.day.minutes.poly.5.ctg PubDate.day.minutes.poly.5.ctg 0.014574775
## PubDate.juliandate PubDate.juliandate 0.014361075
## PubDate.zoo PubDate.zoo 0.013260902
## UniqueID UniqueID 0.011824920
## .rnorm .rnorm 0.008212201
## PubDate.last16.log1p.ctg PubDate.last16.log1p.ctg 0.007783530
## PubDate.last2.log1p.ctg PubDate.last2.log1p.ctg 0.006916600
## PubDate.last4.log1p.ctg PubDate.last4.log1p.ctg 0.004792781
## PubDate.last8.log1p.ctg PubDate.last8.log1p.ctg 0.003914960
## PubDate.day.minutes.poly.2.ctg PubDate.day.minutes.poly.2.ctg 0.003596414
## PubDate.last32.log1p PubDate.last32.log1p 0.003558081
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## PubDate.date.fctr PubDate.date.fctr -0.011647558
## PubDate.second.fctr PubDate.second.fctr -0.011879458
## PubDate.minute.fctr PubDate.minute.fctr -0.034073846
## PubDate.wkday.fctr PubDate.wkday.fctr -0.039801288
## WordCount.nexp WordCount.nexp -0.053208396
## PubDate.day.minutes.poly.5 PubDate.day.minutes.poly.5 -0.055929231
## PubDate.year.fctr PubDate.year.fctr NA
## exclude.as.feat cor.y.abs
## Popular 1 1.000000000
## WordCount.root2 0 0.292120679
## WordCount 1 0.257526549
## WordCount.log1p 0 0.254319628
## NDSSName.my.fctr 0 0.165445970
## PubDate.day.minutes 1 0.156753478
## PubDate.day.minutes.poly.1 0 0.156753478
## PubDate.hour.fctr 0 0.135436805
## PubDate.wkend 0 0.104707290
## PubDate.day.minutes.poly.4 0 0.073941394
## PubDate.day.minutes.poly.2 0 0.070977720
## PubDate.last4.log1p 0 0.066473282
## PubDate.last2.log1p 0 0.063068716
## PubDate.last8.log1p 0 0.054458821
## PubDate.last16.log1p 0 0.040735543
## PubDate.day.minutes.poly.3 0 0.027983551
## PubDate.zoo.ctg 1 0.022782795
## PubDate.month.fctr 0 0.019148739
## PubDate.POSIX 1 0.015683258
## PubDate.last32.log1p.ctg 0 0.015395971
## PubDate.day.minutes.poly.3.ctg 0 0.014982807
## PubDate.hlday 0 0.014690122
## PubDate.day.minutes.poly.4.ctg 0 0.014601521
## PubDate.day.minutes.poly.5.ctg 0 0.014574775
## PubDate.juliandate 0 0.014361075
## PubDate.zoo 1 0.013260902
## UniqueID 1 0.011824920
## .rnorm 0 0.008212201
## PubDate.last16.log1p.ctg 0 0.007783530
## PubDate.last2.log1p.ctg 0 0.006916600
## PubDate.last4.log1p.ctg 0 0.004792781
## PubDate.last8.log1p.ctg 0 0.003914960
## PubDate.day.minutes.poly.2.ctg 0 0.003596414
## PubDate.last32.log1p 0 0.003558081
## PubDate.day.minutes.poly.1.ctg 0 0.002432289
## PubDate.date.fctr 0 0.011647558
## PubDate.second.fctr 0 0.011879458
## PubDate.minute.fctr 0 0.034073846
## PubDate.wkday.fctr 0 0.039801288
## WordCount.nexp 0 0.053208396
## PubDate.day.minutes.poly.5 0 0.055929231
## PubDate.year.fctr 0 NA
## cor.high.X freqRatio
## Popular <NA> 4.976212
## WordCount.root2 <NA> 2.315789
## WordCount <NA> 2.315789
## WordCount.log1p WordCount.root2 2.315789
## NDSSName.my.fctr <NA> 1.348739
## PubDate.day.minutes <NA> 1.225490
## PubDate.day.minutes.poly.1 <NA> 1.225490
## PubDate.hour.fctr PubDate.day.minutes.poly.1 1.835040
## PubDate.wkend <NA> 12.011952
## PubDate.day.minutes.poly.4 <NA> 1.225490
## PubDate.day.minutes.poly.2 <NA> 1.225490
## PubDate.last4.log1p <NA> 1.125000
## PubDate.last2.log1p PubDate.last4.log1p 1.375000
## PubDate.last8.log1p PubDate.last4.log1p 1.142857
## PubDate.last16.log1p <NA> 3.200000
## PubDate.day.minutes.poly.3 <NA> 1.225490
## PubDate.zoo.ctg <NA> 1.000000
## PubDate.month.fctr <NA> 1.017514
## PubDate.POSIX <NA> 1.000000
## PubDate.last32.log1p.ctg <NA> 239.000000
## PubDate.day.minutes.poly.3.ctg <NA> 1.083333
## PubDate.hlday <NA> 28.160714
## PubDate.day.minutes.poly.4.ctg <NA> 1.083333
## PubDate.day.minutes.poly.5.ctg <NA> 1.083333
## PubDate.juliandate PubDate.month.fctr 1.032520
## PubDate.zoo <NA> 1.000000
## UniqueID <NA> 1.000000
## .rnorm <NA> 1.000000
## PubDate.last16.log1p.ctg <NA> 60.000000
## PubDate.last2.log1p.ctg <NA> 5.000000
## PubDate.last4.log1p.ctg <NA> 20.000000
## PubDate.last8.log1p.ctg <NA> 40.000000
## PubDate.day.minutes.poly.2.ctg <NA> 1.083333
## PubDate.last32.log1p <NA> 8.000000
## PubDate.day.minutes.poly.1.ctg <NA> 1.083333
## PubDate.date.fctr <NA> 1.021394
## PubDate.second.fctr <NA> 1.018204
## PubDate.minute.fctr <NA> 1.483365
## PubDate.wkday.fctr <NA> 1.003268
## WordCount.nexp <NA> 17.761364
## PubDate.day.minutes.poly.5 <NA> 1.225490
## PubDate.year.fctr <NA> 0.000000
## percentUnique zeroVar nzv
## Popular 0.03061849 FALSE FALSE
## WordCount.root2 24.15799143 FALSE FALSE
## WordCount 24.15799143 FALSE FALSE
## WordCount.log1p 24.15799143 FALSE FALSE
## NDSSName.my.fctr 0.32149418 FALSE FALSE
## PubDate.day.minutes 18.08022045 FALSE FALSE
## PubDate.day.minutes.poly.1 18.08022045 FALSE FALSE
## PubDate.hour.fctr 0.04592774 FALSE FALSE
## PubDate.wkend 0.03061849 FALSE FALSE
## PubDate.day.minutes.poly.4 18.08022045 FALSE FALSE
## PubDate.day.minutes.poly.2 18.08022045 FALSE FALSE
## PubDate.last4.log1p 64.98775260 FALSE FALSE
## PubDate.last2.log1p 51.17881200 FALSE FALSE
## PubDate.last8.log1p 75.12247397 FALSE FALSE
## PubDate.last16.log1p 84.44580527 FALSE FALSE
## PubDate.day.minutes.poly.3 18.08022045 FALSE FALSE
## PubDate.zoo.ctg 99.92345377 FALSE FALSE
## PubDate.month.fctr 0.04592774 FALSE FALSE
## PubDate.POSIX 99.86221678 FALSE FALSE
## PubDate.last32.log1p.ctg 92.11573791 FALSE FALSE
## PubDate.day.minutes.poly.3.ctg 53.96509492 FALSE FALSE
## PubDate.hlday 0.03061849 FALSE TRUE
## PubDate.day.minutes.poly.4.ctg 53.94978567 FALSE FALSE
## PubDate.day.minutes.poly.5.ctg 53.94978567 FALSE FALSE
## PubDate.juliandate 1.39314146 FALSE FALSE
## PubDate.zoo 99.86221678 FALSE FALSE
## UniqueID 100.00000000 FALSE FALSE
## .rnorm 100.00000000 FALSE FALSE
## PubDate.last16.log1p.ctg 95.17758726 FALSE FALSE
## PubDate.last2.log1p.ctg 92.19228414 FALSE FALSE
## PubDate.last4.log1p.ctg 95.88181261 FALSE FALSE
## PubDate.last8.log1p.ctg 96.41763625 FALSE FALSE
## PubDate.day.minutes.poly.2.ctg 53.94978567 FALSE FALSE
## PubDate.last32.log1p 90.99816289 FALSE FALSE
## PubDate.day.minutes.poly.1.ctg 53.96509492 FALSE FALSE
## PubDate.date.fctr 0.07654623 FALSE FALSE
## PubDate.second.fctr 0.06123699 FALSE FALSE
## PubDate.minute.fctr 0.06123699 FALSE FALSE
## PubDate.wkday.fctr 0.10716473 FALSE FALSE
## WordCount.nexp 11.32884262 FALSE FALSE
## PubDate.day.minutes.poly.5 18.08022045 FALSE FALSE
## PubDate.year.fctr 0.01530925 TRUE TRUE
## is.cor.y.abs.low
## Popular FALSE
## WordCount.root2 FALSE
## WordCount FALSE
## WordCount.log1p FALSE
## NDSSName.my.fctr FALSE
## PubDate.day.minutes FALSE
## PubDate.day.minutes.poly.1 FALSE
## PubDate.hour.fctr FALSE
## PubDate.wkend FALSE
## PubDate.day.minutes.poly.4 FALSE
## PubDate.day.minutes.poly.2 FALSE
## PubDate.last4.log1p FALSE
## PubDate.last2.log1p FALSE
## PubDate.last8.log1p FALSE
## PubDate.last16.log1p FALSE
## PubDate.day.minutes.poly.3 FALSE
## PubDate.zoo.ctg FALSE
## PubDate.month.fctr FALSE
## PubDate.POSIX FALSE
## PubDate.last32.log1p.ctg FALSE
## PubDate.day.minutes.poly.3.ctg FALSE
## PubDate.hlday FALSE
## PubDate.day.minutes.poly.4.ctg FALSE
## PubDate.day.minutes.poly.5.ctg FALSE
## PubDate.juliandate FALSE
## PubDate.zoo FALSE
## UniqueID FALSE
## .rnorm FALSE
## PubDate.last16.log1p.ctg TRUE
## PubDate.last2.log1p.ctg TRUE
## PubDate.last4.log1p.ctg TRUE
## PubDate.last8.log1p.ctg TRUE
## PubDate.day.minutes.poly.2.ctg TRUE
## PubDate.last32.log1p TRUE
## PubDate.day.minutes.poly.1.ctg TRUE
## PubDate.date.fctr FALSE
## PubDate.second.fctr FALSE
## PubDate.minute.fctr FALSE
## PubDate.wkday.fctr FALSE
## WordCount.nexp FALSE
## PubDate.day.minutes.poly.5 FALSE
## PubDate.year.fctr NA
## Warning in myplot_scatter(plt_feats_df, "percentUnique", "freqRatio",
## colorcol_name = "nzv", : converting nzv to class:factor
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## Warning: Removed 20 rows containing missing values (geom_point).
## id cor.y exclude.as.feat cor.y.abs
## PubDate.hlday PubDate.hlday 0.01469012 0 0.01469012
## PubDate.year.fctr PubDate.year.fctr NA 0 NA
## cor.high.X freqRatio percentUnique zeroVar nzv
## PubDate.hlday <NA> 28.16071 0.03061849 FALSE TRUE
## PubDate.year.fctr <NA> 0.00000 0.01530925 TRUE TRUE
## is.cor.y.abs.low
## PubDate.hlday FALSE
## PubDate.year.fctr NA
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## +(rfe) fit Fold1.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep1 size: 60
## +(rfe) imp Fold1.Rep1
## -(rfe) imp Fold1.Rep1
## +(rfe) fit Fold1.Rep1 size: 32
## -(rfe) fit Fold1.Rep1 size: 32
## +(rfe) fit Fold1.Rep1 size: 16
## -(rfe) fit Fold1.Rep1 size: 16
## +(rfe) fit Fold1.Rep1 size: 8
## -(rfe) fit Fold1.Rep1 size: 8
## +(rfe) fit Fold1.Rep1 size: 4
## -(rfe) fit Fold1.Rep1 size: 4
## +(rfe) fit Fold1.Rep1 size: 2
## -(rfe) fit Fold1.Rep1 size: 2
## +(rfe) fit Fold2.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep1 size: 60
## +(rfe) imp Fold2.Rep1
## -(rfe) imp Fold2.Rep1
## +(rfe) fit Fold2.Rep1 size: 32
## -(rfe) fit Fold2.Rep1 size: 32
## +(rfe) fit Fold2.Rep1 size: 16
## -(rfe) fit Fold2.Rep1 size: 16
## +(rfe) fit Fold2.Rep1 size: 8
## -(rfe) fit Fold2.Rep1 size: 8
## +(rfe) fit Fold2.Rep1 size: 4
## -(rfe) fit Fold2.Rep1 size: 4
## +(rfe) fit Fold2.Rep1 size: 2
## -(rfe) fit Fold2.Rep1 size: 2
## +(rfe) fit Fold3.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep1 size: 60
## +(rfe) imp Fold3.Rep1
## -(rfe) imp Fold3.Rep1
## +(rfe) fit Fold3.Rep1 size: 32
## -(rfe) fit Fold3.Rep1 size: 32
## +(rfe) fit Fold3.Rep1 size: 16
## -(rfe) fit Fold3.Rep1 size: 16
## +(rfe) fit Fold3.Rep1 size: 8
## -(rfe) fit Fold3.Rep1 size: 8
## +(rfe) fit Fold3.Rep1 size: 4
## -(rfe) fit Fold3.Rep1 size: 4
## +(rfe) fit Fold3.Rep1 size: 2
## -(rfe) fit Fold3.Rep1 size: 2
## +(rfe) fit Fold1.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep2 size: 60
## +(rfe) imp Fold1.Rep2
## -(rfe) imp Fold1.Rep2
## +(rfe) fit Fold1.Rep2 size: 32
## -(rfe) fit Fold1.Rep2 size: 32
## +(rfe) fit Fold1.Rep2 size: 16
## -(rfe) fit Fold1.Rep2 size: 16
## +(rfe) fit Fold1.Rep2 size: 8
## -(rfe) fit Fold1.Rep2 size: 8
## +(rfe) fit Fold1.Rep2 size: 4
## -(rfe) fit Fold1.Rep2 size: 4
## +(rfe) fit Fold1.Rep2 size: 2
## -(rfe) fit Fold1.Rep2 size: 2
## +(rfe) fit Fold2.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep2 size: 60
## +(rfe) imp Fold2.Rep2
## -(rfe) imp Fold2.Rep2
## +(rfe) fit Fold2.Rep2 size: 32
## -(rfe) fit Fold2.Rep2 size: 32
## +(rfe) fit Fold2.Rep2 size: 16
## -(rfe) fit Fold2.Rep2 size: 16
## +(rfe) fit Fold2.Rep2 size: 8
## -(rfe) fit Fold2.Rep2 size: 8
## +(rfe) fit Fold2.Rep2 size: 4
## -(rfe) fit Fold2.Rep2 size: 4
## +(rfe) fit Fold2.Rep2 size: 2
## -(rfe) fit Fold2.Rep2 size: 2
## +(rfe) fit Fold3.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep2 size: 60
## +(rfe) imp Fold3.Rep2
## -(rfe) imp Fold3.Rep2
## +(rfe) fit Fold3.Rep2 size: 32
## -(rfe) fit Fold3.Rep2 size: 32
## +(rfe) fit Fold3.Rep2 size: 16
## -(rfe) fit Fold3.Rep2 size: 16
## +(rfe) fit Fold3.Rep2 size: 8
## -(rfe) fit Fold3.Rep2 size: 8
## +(rfe) fit Fold3.Rep2 size: 4
## -(rfe) fit Fold3.Rep2 size: 4
## +(rfe) fit Fold3.Rep2 size: 2
## -(rfe) fit Fold3.Rep2 size: 2
## +(rfe) fit Fold1.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep3 size: 60
## +(rfe) imp Fold1.Rep3
## -(rfe) imp Fold1.Rep3
## +(rfe) fit Fold1.Rep3 size: 32
## -(rfe) fit Fold1.Rep3 size: 32
## +(rfe) fit Fold1.Rep3 size: 16
## -(rfe) fit Fold1.Rep3 size: 16
## +(rfe) fit Fold1.Rep3 size: 8
## -(rfe) fit Fold1.Rep3 size: 8
## +(rfe) fit Fold1.Rep3 size: 4
## -(rfe) fit Fold1.Rep3 size: 4
## +(rfe) fit Fold1.Rep3 size: 2
## -(rfe) fit Fold1.Rep3 size: 2
## +(rfe) fit Fold2.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep3 size: 60
## +(rfe) imp Fold2.Rep3
## -(rfe) imp Fold2.Rep3
## +(rfe) fit Fold2.Rep3 size: 32
## -(rfe) fit Fold2.Rep3 size: 32
## +(rfe) fit Fold2.Rep3 size: 16
## -(rfe) fit Fold2.Rep3 size: 16
## +(rfe) fit Fold2.Rep3 size: 8
## -(rfe) fit Fold2.Rep3 size: 8
## +(rfe) fit Fold2.Rep3 size: 4
## -(rfe) fit Fold2.Rep3 size: 4
## +(rfe) fit Fold2.Rep3 size: 2
## -(rfe) fit Fold2.Rep3 size: 2
## +(rfe) fit Fold3.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep3 size: 60
## +(rfe) imp Fold3.Rep3
## -(rfe) imp Fold3.Rep3
## +(rfe) fit Fold3.Rep3 size: 32
## -(rfe) fit Fold3.Rep3 size: 32
## +(rfe) fit Fold3.Rep3 size: 16
## -(rfe) fit Fold3.Rep3 size: 16
## +(rfe) fit Fold3.Rep3 size: 8
## -(rfe) fit Fold3.Rep3 size: 8
## +(rfe) fit Fold3.Rep3 size: 4
## -(rfe) fit Fold3.Rep3 size: 4
## +(rfe) fit Fold3.Rep3 size: 2
## -(rfe) fit Fold3.Rep3 size: 2
## Warning in lda.default(x, grouping, ...): variables are collinear
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (3 fold, repeated 3 times)
##
## Resampling performance over subset size:
##
## Variables Accuracy Kappa AccuracySD KappaSD Selected
## 2 0.8096 0.03708 0.005112 0.01788
## 4 0.8864 0.52518 0.005536 0.02625
## 8 0.8945 0.57850 0.013874 0.06859
## 16 0.9304 0.75912 0.004608 0.01694
## 32 0.9305 0.75960 0.004552 0.01678
## 60 0.9326 0.76882 0.004814 0.01705 *
##
## The top 5 variables (out of 60):
## WordCount.log1p, WordCount.root2, WordCount.nexp, NDSSName.my.fctrOpEd#Opinion#, PubDate.day.minutes.poly.1
##
## [1] "WordCount.log1p"
## [2] "WordCount.root2"
## [3] "WordCount.nexp"
## [4] "NDSSName.my.fctrOpEd#Opinion#"
## [5] "PubDate.day.minutes.poly.1"
## [6] "PubDate.day.minutes.poly.4"
## [7] "PubDate.hour.fctr(15.3,23]"
## [8] "NDSSName.my.fctrScience#Health#"
## [9] "PubDate.last4.log1p"
## [10] "PubDate.last2.log1p"
## [11] "NDSSName.my.fctrBusiness#Crosswords/Games#"
## [12] "NDSSName.my.fctrStyles#U.S.#"
## [13] "PubDate.last8.log1p"
## [14] "PubDate.day.minutes.poly.5"
## [15] "PubDate.wkend"
## [16] "NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg"
## [17] "PubDate.last16.log1p"
## [18] "PubDate.juliandate"
## [19] "PubDate.month.fctr11"
## [20] "PubDate.day.minutes.poly.3"
## [21] "PubDate.wkday.fctr6"
## [22] "PubDate.date.fctr(7,13]"
## [23] "PubDate.second.fctr(14.8,29.5]"
## [24] "PubDate.wkday.fctr1"
## [25] "PubDate.month.fctr10"
## [26] ".rnorm"
## [27] "PubDate.last32.log1p"
## [28] "NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg"
## [29] "PubDate.minute.fctr(44.2,59.1]"
## [30] "NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg"
## [31] "NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg"
## [32] "PubDate.day.minutes.poly.2"
## [33] "PubDate.hour.fctr(7.67,15.3]"
## [34] "PubDate.date.fctr(25,31]"
## [35] "PubDate.minute.fctr(14.8,29.5]"
## [36] "PubDate.second.fctr(44.2,59.1]"
## [37] "PubDate.wkday.fctr3"
## [38] "NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg"
## [39] "NDSSName.my.fctrmyOther"
## [40] "NDSSName.my.fctr#Opinion#RoomForDebate"
## [41] "PubDate.date.fctr(19,25]"
## [42] "NDSSName.my.fctrBusiness#Technology#"
## [43] "PubDate.wkday.fctr4"
## [44] "PubDate.second.fctr(29.5,44.2]"
## [45] "PubDate.date.fctr(13,19]"
## [46] "NDSSName.my.fctrMetro#N.Y./Region#"
## [47] "NDSSName.my.fctrTravel#Travel#"
## [48] "NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness"
## [49] "NDSSName.my.fctr#Multimedia#"
## [50] "PubDate.wkday.fctr2"
## [51] "NDSSName.my.fctrStyles##Fashion"
## [52] "NDSSName.my.fctrForeign#World#"
## [53] "PubDate.minute.fctr(29.5,44.2]"
## [54] "NDSSName.my.fctrForeign#World#AsiaPacific"
## [55] "PubDate.wkday.fctr5"
## [56] "NDSSName.my.fctr#U.S.#Education"
## [57] "NDSSName.my.fctrCulture#Arts#"
## [58] "NDSSName.my.fctrBusiness#BusinessDay#Dealbook"
## [59] "NDSSName.my.fctr##"
## [60] "NDSSName.my.fctrTStyle##"
## [1] "numeric data missing in : "
## Popular Popular.fctr
## 1870 1870
## [1] "numeric data w/ 0s in : "
## WordCount Popular WordCount.log1p
## 109 5439 109
## WordCount.root2 WordCount.nexp PubDate.wkday.fctr
## 109 2044 378
## PubDate.wkend PubDate.hlday PubDate.day.minutes
## 7787 8160 5
## PubDate.last2.log1p PubDate.last4.log1p PubDate.last8.log1p
## 2 4 8
## PubDate.last16.log1p PubDate.last32.log1p PubDate.last2.log1p.ctg
## 16 32 42
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg PubDate.last16.log1p.ctg
## 84 168 336
## PubDate.last32.log1p.ctg
## 670
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## NewsDesk SectionName SubsectionName Headline Snippet
## 2408 2899 6176 0 13
## Abstract PubDate NDSSName.my .lcn
## 17 0 0 1870
## [1] "glb_feats_df:"
## [1] 42 12
## id exclude.as.feat rsp_var
## Popular.fctr Popular.fctr TRUE TRUE
## id cor.y exclude.as.feat cor.y.abs cor.high.X
## Popular Popular 1.00000000 TRUE 1.00000000 <NA>
## UniqueID UniqueID 0.01182492 TRUE 0.01182492 <NA>
## Popular.fctr Popular.fctr NA TRUE NA <NA>
## freqRatio percentUnique zeroVar nzv is.cor.y.abs.low
## Popular 4.976212 0.03061849 FALSE FALSE FALSE
## UniqueID 1.000000 100.00000000 FALSE FALSE FALSE
## Popular.fctr NA NA NA NA NA
## interaction.feat shapiro.test.p.value rsp_var_raw id_var
## Popular <NA> NA TRUE NA
## UniqueID <NA> NA FALSE TRUE
## Popular.fctr <NA> NA NA NA
## rsp_var
## Popular NA
## UniqueID NA
## Popular.fctr TRUE
## [1] "glb_feats_df vs. glbObsAll: "
## character(0)
## [1] "glbObsAll vs. glb_feats_df: "
## character(0)
## label step_major step_minor label_minor bgn end elapsed
## 9 select.features 5 0 0 22.267 47.064 24.797
## 10 fit.models 6 0 0 47.064 NA NA
6.0: fit models# load(paste0(glb_out_pfx, "dsk.RData"))
get_model_sel_frmla <- function() {
model_evl_terms <- c(NULL)
# min.aic.fit might not be avl
lclMdlEvlCriteria <-
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)]
for (metric in lclMdlEvlCriteria)
model_evl_terms <- c(model_evl_terms,
ifelse(length(grep("max", metric)) > 0, "-", "+"), metric)
if (glb_is_classification && glb_is_binomial)
model_evl_terms <- c(model_evl_terms, "-", "opt.prob.threshold.OOB")
model_sel_frmla <- as.formula(paste(c("~ ", model_evl_terms), collapse = " "))
return(model_sel_frmla)
}
get_dsp_models_df <- function() {
dsp_models_cols <- c("id",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
dsp_models_df <-
#orderBy(get_model_sel_frmla(), glb_models_df)[, c("id", glbMdlMetricsEval)]
orderBy(get_model_sel_frmla(), glb_models_df)[, dsp_models_cols]
nCvMdl <- sapply(glb_models_lst, function(mdl) nrow(mdl$results))
nParams <- sapply(glb_models_lst, function(mdl) ifelse(mdl$method == "custom", 0,
nrow(subset(modelLookup(mdl$method), parameter != "parameter"))))
# nCvMdl <- nCvMdl[names(nCvMdl) != "avNNet"]
# nParams <- nParams[names(nParams) != "avNNet"]
if (length(cvMdlProblems <- nCvMdl[nCvMdl <= nParams]) > 0) {
print("Cross Validation issues:")
warning("Cross Validation issues:")
print(cvMdlProblems)
}
pltMdls <- setdiff(names(nCvMdl), names(cvMdlProblems))
pltMdls <- setdiff(pltMdls, names(nParams[nParams == 0]))
# length(pltMdls) == 21
png(paste0(glb_out_pfx, "bestTune.png"), width = 480 * 2, height = 480 * 4)
grid.newpage()
pushViewport(viewport(layout = grid.layout(ceiling(length(pltMdls) / 2.0), 2)))
pltIx <- 1
for (mdlId in pltMdls) {
print(ggplot(glb_models_lst[[mdlId]], highBestTune = TRUE) + labs(title = mdlId),
vp = viewport(layout.pos.row = ceiling(pltIx / 2.0),
layout.pos.col = ((pltIx - 1) %% 2) + 1))
pltIx <- pltIx + 1
}
dev.off()
return(dsp_models_df)
}
#get_dsp_models_df()
if (glb_is_classification && glb_is_binomial &&
(length(unique(glbObsFit[, glb_rsp_var])) < 2))
stop("glbObsFit$", glb_rsp_var, ": contains less than 2 unique values: ",
paste0(unique(glbObsFit[, glb_rsp_var]), collapse=", "))
max_cor_y_x_vars <- orderBy(~ -cor.y.abs,
subset(glb_feats_df, (exclude.as.feat == 0) & !nzv & !is.cor.y.abs.low &
is.na(cor.high.X)))[1:2, "id"]
max_cor_y_x_vars <- max_cor_y_x_vars[!is.na(max_cor_y_x_vars)]
if (!is.null(glb_Baseline_mdl_var)) {
if ((max_cor_y_x_vars[1] != glb_Baseline_mdl_var) &
(glb_feats_df[glb_feats_df$id == max_cor_y_x_vars[1], "cor.y.abs"] >
glb_feats_df[glb_feats_df$id == glb_Baseline_mdl_var, "cor.y.abs"]))
stop(max_cor_y_x_vars[1], " has a higher correlation with ", glb_rsp_var,
" than the Baseline var: ", glb_Baseline_mdl_var)
}
glb_model_type <- ifelse(glb_is_regression, "regression", "classification")
# Model specs
c("id.prefix", "method", "type",
# trainControl params
"preProc.method", "cv.n.folds", "cv.n.repeats", "summary.fn",
# train params
"metric", "metric.maximize", "tune.df")
## [1] "id.prefix" "method" "type"
## [4] "preProc.method" "cv.n.folds" "cv.n.repeats"
## [7] "summary.fn" "metric" "metric.maximize"
## [10] "tune.df"
# Baseline
if (!is.null(glb_Baseline_mdl_var))
ret_lst <- myfit_mdl(mdl_id="Baseline",
model_method="mybaseln_classfr",
indep_vars_vctr=glb_Baseline_mdl_var,
rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
# Most Frequent Outcome "MFO" model: mean(y) for regression
# Not using caret's nullModel since model stats not avl
# Cannot use rpart for multinomial classification since it predicts non-MFO
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "MFO", type = glb_model_type, trainControl.method = "none",
train.method = ifelse(glb_is_regression, "lm", "myMFO_classfr"))),
indep_vars = ".rnorm", rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
## [1] "fitting model: MFO###myMFO_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## [1] "in MFO.Classifier$fit"
## [1] "unique.vals:"
## [1] N Y
## Levels: N Y
## [1] "unique.prob:"
## y
## N Y
## 0.820358 0.179642
## [1] "MFO.val:"
## [1] "N"
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 -none- numeric
## MFO.val 1 -none- character
## x.names 1 -none- character
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] "entr MFO.Classifier$predict"
## [1] "exit MFO.Classifier$predict"
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.820358 0.179642
## 2 0.820358 0.179642
## 3 0.820358 0.179642
## 4 0.820358 0.179642
## 5 0.820358 0.179642
## 6 0.820358 0.179642
## Prediction
## Reference N Y
## N 0 3941
## Y 0 863
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.1796420 0.0000000 0.1688795 0.1907952 0.8203580
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
## [1] "entr MFO.Classifier$predict"
## [1] "exit MFO.Classifier$predict"
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.820358 0.179642
## 2 0.820358 0.179642
## 3 0.820358 0.179642
## 4 0.820358 0.179642
## 5 0.820358 0.179642
## 6 0.820358 0.179642
## Prediction
## Reference N Y
## N 0 1498
## Y 0 230
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.1331019 0.0000000 0.1174298 0.1500310 0.8668981
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
## id feats max.nTuningRuns min.elapsedtime.everything
## 1 MFO###myMFO_classfr .rnorm 0 0.292
## min.elapsedtime.final max.AUCpROC.fit max.Sens.fit max.Spec.fit
## 1 0.003 0.5 1 0
## max.AUCROCR.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.1 0.3045703 0.179642
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.1688795 0.1907952 0
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5 1 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.2349336 0.1331019
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.1174298 0.150031 0
if (glb_is_classification)
# "random" model - only for classification;
# none needed for regression since it is same as MFO
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = "Random", type = glb_model_type, trainControl.method = "none",
train.method = "myrandom_classfr")),
indep_vars = ".rnorm", rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
## [1] "fitting model: Random###myrandom_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 table numeric
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] "in Random.Classifier$prob"
## Prediction
## Reference N Y
## N 0 3941
## Y 0 863
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.1796420 0.0000000 0.1688795 0.1907952 0.8203580
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
## [1] "in Random.Classifier$prob"
## Prediction
## Reference N Y
## N 0 1498
## Y 0 230
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.1331019 0.0000000 0.1174298 0.1500310 0.8668981
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
## id feats max.nTuningRuns
## 1 Random###myrandom_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.AUCpROC.fit
## 1 0.302 0.002 0.4990604
## max.Sens.fit max.Spec.fit max.AUCROCR.fit opt.prob.threshold.fit
## 1 0.8312611 0.1668598 0.4972757 0.1
## max.f.score.fit max.Accuracy.fit max.AccuracyLower.fit
## 1 0.3045703 0.179642 0.1688795
## max.AccuracyUpper.fit max.Kappa.fit max.AUCpROC.OOB max.Sens.OOB
## 1 0.1907952 0 0.5125675 0.8077437
## max.Spec.OOB max.AUCROCR.OOB opt.prob.threshold.OOB max.f.score.OOB
## 1 0.2173913 0.4857956 0.1 0.2349336
## max.Accuracy.OOB max.AccuracyLower.OOB max.AccuracyUpper.OOB
## 1 0.1331019 0.1174298 0.150031
## max.Kappa.OOB
## 1 0
# ret_lst <- myfit_mdl(mdl_id = "Random", model_method = "myrandom_classfr",
# model_type = glb_model_type,
# indep_vars_vctr = ".rnorm",
# rsp_var = glb_rsp_var,
# fit_df = glbObsFit, OOB_df = glbObsOOB)
# Max.cor.Y
# Check impact of cv
# rpart is not a good candidate since caret does not optimize cp (only tuning parameter of rpart) well
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Max.cor.Y.rcv.1X1", type=glb_model_type, trainControl.method="none",
train.method="glmnet")),
indep_vars=max_cor_y_x_vars, rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
## [1] "fitting model: Max.cor.Y.rcv.1X1###glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 2.0-2
## Fitting alpha = 0.1, lambda = 0.00434 on full training set
## Length Class Mode
## a0 100 -none- numeric
## beta 2100 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -4.57159198
## NDSSName.my.fctr#Multimedia#
## -1.22219085
## NDSSName.my.fctr#Opinion#RoomForDebate
## -3.46072453
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 4.06871185
## NDSSName.my.fctr#U.S.#Education
## -1.89443632
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22472818
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.95537118
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 4.55408513
## NDSSName.my.fctrBusiness#Technology#
## 0.77368538
## NDSSName.my.fctrCulture#Arts#
## -0.09465691
## NDSSName.my.fctrForeign#World#
## -1.45528874
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.60117505
## NDSSName.my.fctrMetro#N.Y./Region#
## 0.01563989
## NDSSName.my.fctrOpEd#Opinion#
## 4.51696382
## NDSSName.my.fctrScience#Health#
## 3.51595317
## NDSSName.my.fctrStyles##Fashion
## -1.85948925
## NDSSName.my.fctrStyles#U.S.#
## 3.27995325
## NDSSName.my.fctrTStyle##
## -1.54110404
## NDSSName.my.fctrTravel#Travel#
## -1.41940605
## NDSSName.my.fctrmyOther
## -1.90156922
## WordCount.root2
## 0.08434378
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -4.60394059
## NDSSName.my.fctr#Multimedia#
## -1.25163328
## NDSSName.my.fctr#Opinion#RoomForDebate
## -3.55521332
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 4.09217313
## NDSSName.my.fctr#U.S.#Education
## -1.96172971
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22495986
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.96836050
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 4.58120497
## NDSSName.my.fctrBusiness#Technology#
## 0.78504703
## NDSSName.my.fctrCulture#Arts#
## -0.09069661
## NDSSName.my.fctrForeign#World#
## -1.51061232
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.63313235
## NDSSName.my.fctrMetro#N.Y./Region#
## 0.02466697
## NDSSName.my.fctrOpEd#Opinion#
## 4.54361134
## NDSSName.my.fctrScience#Health#
## 3.53210055
## NDSSName.my.fctrStyles##Fashion
## -1.92188290
## NDSSName.my.fctrStyles#U.S.#
## 3.29488750
## NDSSName.my.fctrTStyle##
## -1.57788931
## NDSSName.my.fctrTravel#Travel#
## -1.47368131
## NDSSName.my.fctrmyOther
## -1.97357582
## WordCount.root2
## 0.08537319
## Prediction
## Reference N Y
## N 3796 145
## Y 177 686
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.329725e-01 7.692476e-01 9.255302e-01 9.398832e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.026390e-114 8.406670e-02
## Prediction
## Reference N Y
## N 1151 347
## Y 67 163
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.604167e-01 3.148374e-01 7.395703e-01 7.803749e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 8.593187e-43
## id feats
## 1 Max.cor.Y.rcv.1X1###glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 1.01 0.273
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8790544 0.9632073 0.7949015 0.9608594
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.8099174 0.9329725
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9255302 0.9398832 0.7692476
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8116126
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4405405 0.7604167
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7395703 0.7803749 0.3148374
# rcv_n_folds == 1 & rcv_n_repeats > 1 crashes
for (rcv_n_folds in seq(3, glb_rcv_n_folds + 2, 2))
for (rcv_n_repeats in seq(1, glb_rcv_n_repeats + 2, 2)) {
# Experiment specific code to avoid caret crash
# lcl_tune_models_df <- rbind(data.frame()
# ,data.frame(method = "glmnet", parameter = "alpha",
# vals = "0.100 0.325 0.550 0.775 1.000")
# ,data.frame(method = "glmnet", parameter = "lambda",
# vals = "9.342e-02")
# )
ret_lst <- myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst =
list(
id.prefix = paste0("Max.cor.Y.rcv.", rcv_n_folds, "X", rcv_n_repeats),
type = glb_model_type,
# tune.df = lcl_tune_models_df,
trainControl.method = "repeatedcv",
trainControl.number = rcv_n_folds,
trainControl.repeats = rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.method = "glmnet", train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize)),
indep_vars = max_cor_y_x_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
## [1] "fitting model: Max.cor.Y.rcv.3X1##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.325, lambda = 0.0201 on full training set
## Length Class Mode
## a0 99 -none- numeric
## beta 2079 dgCMatrix S4
## df 99 -none- numeric
## dim 2 -none- numeric
## lambda 99 -none- numeric
## dev.ratio 99 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.89350373
## NDSSName.my.fctr#Multimedia#
## -0.01916344
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.18453357
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.21701058
## NDSSName.my.fctr#U.S.#Education
## -0.47679040
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.09891374
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.87281404
## NDSSName.my.fctrBusiness#Technology#
## 0.40965256
## NDSSName.my.fctrForeign#World#
## -0.05114617
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.47464340
## NDSSName.my.fctrOpEd#Opinion#
## 3.95214357
## NDSSName.my.fctrScience#Health#
## 3.14232408
## NDSSName.my.fctrStyles##Fashion
## -0.31867093
## NDSSName.my.fctrStyles#U.S.#
## 2.92610567
## NDSSName.my.fctrTStyle##
## -0.60538025
## WordCount.root2
## 0.05783392
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.95644632
## NDSSName.my.fctr#Multimedia#
## -0.07182859
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.30034382
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.29694236
## NDSSName.my.fctr#U.S.#Education
## -0.53415905
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.14259759
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.94231812
## NDSSName.my.fctrBusiness#Technology#
## 0.45657914
## NDSSName.my.fctrForeign#World#
## -0.10021084
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.53077048
## NDSSName.my.fctrOpEd#Opinion#
## 4.01324666
## NDSSName.my.fctrScience#Health#
## 3.18936803
## NDSSName.my.fctrStyles##Fashion
## -0.38069674
## NDSSName.my.fctrStyles#U.S.#
## 2.97176051
## NDSSName.my.fctrTStyle##
## -0.64837249
## WordCount.root2
## 0.05978318
## Prediction
## Reference N Y
## N 3796 145
## Y 177 686
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.329725e-01 7.692476e-01 9.255302e-01 9.398832e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.026390e-114 8.406670e-02
## Prediction
## Reference N Y
## N 1146 352
## Y 67 163
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.575231e-01 3.107477e-01 7.365992e-01 7.775689e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 9.066396e-44
## id feats
## 1 Max.cor.Y.rcv.3X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 2.476 0.271
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8767919 0.964476 0.7891078 0.9582555
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8099174 0.9335973
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9255302 0.9398832 0.7691678
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8067975
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4375839 0.7575231
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7365992 0.7775689 0.3107477
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.007015493 0.02403706
## [1] "fitting model: Max.cor.Y.rcv.3X3##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.325, lambda = 0.0201 on full training set
## Length Class Mode
## a0 99 -none- numeric
## beta 2079 dgCMatrix S4
## df 99 -none- numeric
## dim 2 -none- numeric
## lambda 99 -none- numeric
## dev.ratio 99 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.89350373
## NDSSName.my.fctr#Multimedia#
## -0.01916344
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.18453357
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.21701058
## NDSSName.my.fctr#U.S.#Education
## -0.47679040
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.09891374
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.87281404
## NDSSName.my.fctrBusiness#Technology#
## 0.40965256
## NDSSName.my.fctrForeign#World#
## -0.05114617
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.47464340
## NDSSName.my.fctrOpEd#Opinion#
## 3.95214357
## NDSSName.my.fctrScience#Health#
## 3.14232408
## NDSSName.my.fctrStyles##Fashion
## -0.31867093
## NDSSName.my.fctrStyles#U.S.#
## 2.92610567
## NDSSName.my.fctrTStyle##
## -0.60538025
## WordCount.root2
## 0.05783392
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.95644632
## NDSSName.my.fctr#Multimedia#
## -0.07182859
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.30034382
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.29694236
## NDSSName.my.fctr#U.S.#Education
## -0.53415905
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.14259759
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.94231812
## NDSSName.my.fctrBusiness#Technology#
## 0.45657914
## NDSSName.my.fctrForeign#World#
## -0.10021084
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.53077048
## NDSSName.my.fctrOpEd#Opinion#
## 4.01324666
## NDSSName.my.fctrScience#Health#
## 3.18936803
## NDSSName.my.fctrStyles##Fashion
## -0.38069674
## NDSSName.my.fctrStyles#U.S.#
## 2.97176051
## NDSSName.my.fctrTStyle##
## -0.64837249
## WordCount.root2
## 0.05978318
## Prediction
## Reference N Y
## N 3796 145
## Y 177 686
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.329725e-01 7.692476e-01 9.255302e-01 9.398832e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.026390e-114 8.406670e-02
## Prediction
## Reference N Y
## N 1146 352
## Y 67 163
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.575231e-01 3.107477e-01 7.365992e-01 7.775689e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 9.066396e-44
## id feats
## 1 Max.cor.Y.rcv.3X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 4.546 0.27
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8767919 0.964476 0.7891078 0.9582555
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8099174 0.9333193
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9255302 0.9398832 0.7690803
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8067975
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4375839 0.7575231
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7365992 0.7775689 0.3107477
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.005178375 0.01754365
## [1] "fitting model: Max.cor.Y.rcv.3X5##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.325, lambda = 0.0201 on full training set
## Length Class Mode
## a0 99 -none- numeric
## beta 2079 dgCMatrix S4
## df 99 -none- numeric
## dim 2 -none- numeric
## lambda 99 -none- numeric
## dev.ratio 99 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.89350373
## NDSSName.my.fctr#Multimedia#
## -0.01916344
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.18453357
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.21701058
## NDSSName.my.fctr#U.S.#Education
## -0.47679040
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.09891374
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.87281404
## NDSSName.my.fctrBusiness#Technology#
## 0.40965256
## NDSSName.my.fctrForeign#World#
## -0.05114617
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.47464340
## NDSSName.my.fctrOpEd#Opinion#
## 3.95214357
## NDSSName.my.fctrScience#Health#
## 3.14232408
## NDSSName.my.fctrStyles##Fashion
## -0.31867093
## NDSSName.my.fctrStyles#U.S.#
## 2.92610567
## NDSSName.my.fctrTStyle##
## -0.60538025
## WordCount.root2
## 0.05783392
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.95644632
## NDSSName.my.fctr#Multimedia#
## -0.07182859
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.30034382
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.29694236
## NDSSName.my.fctr#U.S.#Education
## -0.53415905
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.14259759
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.94231812
## NDSSName.my.fctrBusiness#Technology#
## 0.45657914
## NDSSName.my.fctrForeign#World#
## -0.10021084
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.53077048
## NDSSName.my.fctrOpEd#Opinion#
## 4.01324666
## NDSSName.my.fctrScience#Health#
## 3.18936803
## NDSSName.my.fctrStyles##Fashion
## -0.38069674
## NDSSName.my.fctrStyles#U.S.#
## 2.97176051
## NDSSName.my.fctrTStyle##
## -0.64837249
## WordCount.root2
## 0.05978318
## Prediction
## Reference N Y
## N 3796 145
## Y 177 686
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.329725e-01 7.692476e-01 9.255302e-01 9.398832e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.026390e-114 8.406670e-02
## Prediction
## Reference N Y
## N 1146 352
## Y 67 163
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.575231e-01 3.107477e-01 7.365992e-01 7.775689e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 9.066396e-44
## id feats
## 1 Max.cor.Y.rcv.3X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 6.409 0.272
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8767919 0.964476 0.7891078 0.9582555
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8099174 0.9332218
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9255302 0.9398832 0.7686375
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8067975
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4375839 0.7575231
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7365992 0.7775689 0.3107477
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.005396525 0.01835474
## [1] "fitting model: Max.cor.Y.rcv.5X1##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0201 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst =
## list(id.prefix = paste0("Max.cor.Y.rcv.", : model's bestTune found at an
## extreme of tuneGrid for parameter: alpha
## Length Class Mode
## a0 100 -none- numeric
## beta 2100 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.81141260
## NDSSName.my.fctr#Multimedia#
## -0.68105584
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.92624537
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.40699589
## NDSSName.my.fctr#U.S.#Education
## -0.98291999
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22577146
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.64343834
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.82797332
## NDSSName.my.fctrBusiness#Technology#
## 0.45317927
## NDSSName.my.fctrCulture#Arts#
## -0.17187706
## NDSSName.my.fctrForeign#World#
## -0.72035867
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.99018968
## NDSSName.my.fctrOpEd#Opinion#
## 3.81891156
## NDSSName.my.fctrScience#Health#
## 3.05516080
## NDSSName.my.fctrStyles##Fashion
## -0.97651721
## NDSSName.my.fctrStyles#U.S.#
## 2.84779285
## NDSSName.my.fctrTStyle##
## -0.94109645
## NDSSName.my.fctrTravel#Travel#
## -0.68827560
## NDSSName.my.fctrmyOther
## -0.84423735
## WordCount.root2
## 0.06115867
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.87108412
## NDSSName.my.fctr#Multimedia#
## -0.71588942
## NDSSName.my.fctr#Opinion#RoomForDebate
## -2.02010163
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.46715540
## NDSSName.my.fctr#U.S.#Education
## -1.02957582
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22558850
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.66798026
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.89132347
## NDSSName.my.fctrBusiness#Technology#
## 0.48212450
## NDSSName.my.fctrCulture#Arts#
## -0.16733777
## NDSSName.my.fctrForeign#World#
## -0.75793881
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.03076807
## NDSSName.my.fctrOpEd#Opinion#
## 3.87908175
## NDSSName.my.fctrScience#Health#
## 3.09788786
## NDSSName.my.fctrStyles##Fashion
## -1.02481879
## NDSSName.my.fctrStyles#U.S.#
## 2.88826078
## NDSSName.my.fctrTStyle##
## -0.97585470
## NDSSName.my.fctrTravel#Travel#
## -0.72668427
## NDSSName.my.fctrmyOther
## -0.90347045
## WordCount.root2
## 0.06289698
## Prediction
## Reference N Y
## N 3800 141
## Y 179 684
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.333888e-01 7.700473e-01 9.259666e-01 9.402789e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.097051e-115 3.860591e-02
## Prediction
## Reference N Y
## N 1137 361
## Y 53 177
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.604167e-01 3.373693e-01 7.395703e-01 7.803749e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.935747e-51
## id feats
## 1 Max.cor.Y.rcv.5X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 3.311 0.267
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8784031 0.9642223 0.792584 0.9607052
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.8104265 0.9331818
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9259666 0.9402789 0.7689055
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8114863
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4609375 0.7604167
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7395703 0.7803749 0.3373693
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.008837283 0.03133449
## [1] "fitting model: Max.cor.Y.rcv.5X3##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0201 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst =
## list(id.prefix = paste0("Max.cor.Y.rcv.", : model's bestTune found at an
## extreme of tuneGrid for parameter: alpha
## Length Class Mode
## a0 100 -none- numeric
## beta 2100 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.81141260
## NDSSName.my.fctr#Multimedia#
## -0.68105584
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.92624537
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.40699589
## NDSSName.my.fctr#U.S.#Education
## -0.98291999
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22577146
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.64343834
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.82797332
## NDSSName.my.fctrBusiness#Technology#
## 0.45317927
## NDSSName.my.fctrCulture#Arts#
## -0.17187706
## NDSSName.my.fctrForeign#World#
## -0.72035867
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.99018968
## NDSSName.my.fctrOpEd#Opinion#
## 3.81891156
## NDSSName.my.fctrScience#Health#
## 3.05516080
## NDSSName.my.fctrStyles##Fashion
## -0.97651721
## NDSSName.my.fctrStyles#U.S.#
## 2.84779285
## NDSSName.my.fctrTStyle##
## -0.94109645
## NDSSName.my.fctrTravel#Travel#
## -0.68827560
## NDSSName.my.fctrmyOther
## -0.84423735
## WordCount.root2
## 0.06115867
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.87108412
## NDSSName.my.fctr#Multimedia#
## -0.71588942
## NDSSName.my.fctr#Opinion#RoomForDebate
## -2.02010163
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.46715540
## NDSSName.my.fctr#U.S.#Education
## -1.02957582
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22558850
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.66798026
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.89132347
## NDSSName.my.fctrBusiness#Technology#
## 0.48212450
## NDSSName.my.fctrCulture#Arts#
## -0.16733777
## NDSSName.my.fctrForeign#World#
## -0.75793881
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.03076807
## NDSSName.my.fctrOpEd#Opinion#
## 3.87908175
## NDSSName.my.fctrScience#Health#
## 3.09788786
## NDSSName.my.fctrStyles##Fashion
## -1.02481879
## NDSSName.my.fctrStyles#U.S.#
## 2.88826078
## NDSSName.my.fctrTStyle##
## -0.97585470
## NDSSName.my.fctrTravel#Travel#
## -0.72668427
## NDSSName.my.fctrmyOther
## -0.90347045
## WordCount.root2
## 0.06289698
## Prediction
## Reference N Y
## N 3800 141
## Y 179 684
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.333888e-01 7.700473e-01 9.259666e-01 9.402789e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.097051e-115 3.860591e-02
## Prediction
## Reference N Y
## N 1137 361
## Y 53 177
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.604167e-01 3.373693e-01 7.395703e-01 7.803749e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.935747e-51
## id feats
## 1 Max.cor.Y.rcv.5X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 6.029 0.268
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8784031 0.9642223 0.792584 0.9607052
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.8104265 0.9333905
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9259666 0.9402789 0.7698577
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8114863
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4609375 0.7604167
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7395703 0.7803749 0.3373693
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.006138477 0.02161286
## [1] "fitting model: Max.cor.Y.rcv.5X5##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0201 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst =
## list(id.prefix = paste0("Max.cor.Y.rcv.", : model's bestTune found at an
## extreme of tuneGrid for parameter: alpha
## Length Class Mode
## a0 100 -none- numeric
## beta 2100 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 21 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.81141260
## NDSSName.my.fctr#Multimedia#
## -0.68105584
## NDSSName.my.fctr#Opinion#RoomForDebate
## -1.92624537
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.40699589
## NDSSName.my.fctr#U.S.#Education
## -0.98291999
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22577146
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.64343834
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.82797332
## NDSSName.my.fctrBusiness#Technology#
## 0.45317927
## NDSSName.my.fctrCulture#Arts#
## -0.17187706
## NDSSName.my.fctrForeign#World#
## -0.72035867
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.99018968
## NDSSName.my.fctrOpEd#Opinion#
## 3.81891156
## NDSSName.my.fctrScience#Health#
## 3.05516080
## NDSSName.my.fctrStyles##Fashion
## -0.97651721
## NDSSName.my.fctrStyles#U.S.#
## 2.84779285
## NDSSName.my.fctrTStyle##
## -0.94109645
## NDSSName.my.fctrTravel#Travel#
## -0.68827560
## NDSSName.my.fctrmyOther
## -0.84423735
## WordCount.root2
## 0.06115867
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.87108412
## NDSSName.my.fctr#Multimedia#
## -0.71588942
## NDSSName.my.fctr#Opinion#RoomForDebate
## -2.02010163
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.46715540
## NDSSName.my.fctr#U.S.#Education
## -1.02957582
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.22558850
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.66798026
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.89132347
## NDSSName.my.fctrBusiness#Technology#
## 0.48212450
## NDSSName.my.fctrCulture#Arts#
## -0.16733777
## NDSSName.my.fctrForeign#World#
## -0.75793881
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.03076807
## NDSSName.my.fctrOpEd#Opinion#
## 3.87908175
## NDSSName.my.fctrScience#Health#
## 3.09788786
## NDSSName.my.fctrStyles##Fashion
## -1.02481879
## NDSSName.my.fctrStyles#U.S.#
## 2.88826078
## NDSSName.my.fctrTStyle##
## -0.97585470
## NDSSName.my.fctrTravel#Travel#
## -0.72668427
## NDSSName.my.fctrmyOther
## -0.90347045
## WordCount.root2
## 0.06289698
## Prediction
## Reference N Y
## N 3800 141
## Y 179 684
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.333888e-01 7.700473e-01 9.259666e-01 9.402789e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.097051e-115 3.860591e-02
## Prediction
## Reference N Y
## N 1137 361
## Y 53 177
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.604167e-01 3.373693e-01 7.395703e-01 7.803749e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.935747e-51
## id feats
## 1 Max.cor.Y.rcv.5X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 9.05 0.27
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8784031 0.9642223 0.792584 0.9607052
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.8104265 0.9331816
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9259666 0.9402789 0.7691429
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5962443 0.9098798 0.2826087 0.8114863
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4609375 0.7604167
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7395703 0.7803749 0.3373693
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.0062138 0.02210061
# Add parallel coordinates graph of glb_models_df[, glbMdlMetricsEval] to evaluate cv parameters
tmp_models_cols <- c("id", "max.nTuningRuns",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
print(myplot_parcoord(obs_df = subset(glb_models_df,
grepl("Max.cor.Y.rcv.", id, fixed = TRUE),
select = -feats)[, tmp_models_cols],
id_var = "id"))
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Max.cor.Y.rcv.1X1.cp.0", type=glb_model_type, trainControl.method="none",
train.method="rpart",
tune.df=data.frame(method="rpart", parameter="cp", min=0.0, max=0.0, by=0.1))),
indep_vars=max_cor_y_x_vars, rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
## [1] "fitting model: Max.cor.Y.rcv.1X1.cp.0###rpart"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## Loading required package: rpart
## Fitting cp = 0 on full training set
## Loading required package: rpart.plot
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4804
##
## CP nsplit rel error
## 1 0.3696407879 0 1.0000000
## 2 0.0984936269 1 0.6303592
## 3 0.0857473928 2 0.5318656
## 4 0.0567786790 3 0.4461182
## 5 0.0104287370 4 0.3893395
## 6 0.0057937428 5 0.3789108
## 7 0.0034762457 7 0.3673233
## 8 0.0023174971 8 0.3638470
## 9 0.0011587486 11 0.3568946
## 10 0.0007724990 13 0.3545771
## 11 0.0005793743 16 0.3522596
## 12 0.0004213631 24 0.3476246
## 13 0.0003862495 35 0.3429896
## 14 0.0000000000 41 0.3406721
##
## Variable importance
## NDSSName.my.fctrOpEd#Opinion#
## 48
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 14
## NDSSName.my.fctrScience#Health#
## 14
## NDSSName.my.fctrStyles#U.S.#
## 11
## WordCount.root2
## 9
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 2
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## 1
##
## Node number 1: 4804 observations, complexity param=0.3696408
## predicted class=N expected loss=0.179642 P(node) =1
## class counts: 3941 863
## probabilities: 0.820 0.180
## left son=2 (4367 obs) right son=3 (437 obs)
## Primary splits:
## NDSSName.my.fctrOpEd#Opinion# < 0.5 to the left, improve=451.59770, (0 missing)
## NDSSName.my.fctrBusiness#Crosswords/Games# < 0.5 to the left, improve=112.88510, (0 missing)
## WordCount.root2 < 25.75849 to the left, improve=111.17610, (0 missing)
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve= 99.35206, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 68.73272, (0 missing)
##
## Node number 2: 4367 observations, complexity param=0.09849363
## predicted class=N expected loss=0.1110602 P(node) =0.9090341
## class counts: 3882 485
## probabilities: 0.889 0.111
## left son=4 (4262 obs) right son=5 (105 obs)
## Primary splits:
## NDSSName.my.fctrBusiness#Crosswords/Games# < 0.5 to the left, improve=135.55130, (0 missing)
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve=125.07920, (0 missing)
## WordCount.root2 < 25.75849 to the left, improve= 94.70710, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 88.56821, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 18.74400, (0 missing)
##
## Node number 3: 437 observations
## predicted class=Y expected loss=0.1350114 P(node) =0.09096586
## class counts: 59 378
## probabilities: 0.135 0.865
##
## Node number 4: 4262 observations, complexity param=0.08574739
## predicted class=N expected loss=0.09150634 P(node) =0.8871774
## class counts: 3872 390
## probabilities: 0.908 0.092
## left son=8 (4114 obs) right son=9 (148 obs)
## Primary splits:
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve=132.96710, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 94.69099, (0 missing)
## WordCount.root2 < 26.49528 to the left, improve= 84.07487, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 19.71762, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve= 10.17000, (0 missing)
##
## Node number 5: 105 observations, complexity param=0.002317497
## predicted class=Y expected loss=0.0952381 P(node) =0.02185679
## class counts: 10 95
## probabilities: 0.095 0.905
## left son=10 (12 obs) right son=11 (93 obs)
## Primary splits:
## WordCount.root2 < 18.9043 to the left, improve=6.455453, (0 missing)
##
## Node number 8: 4114 observations, complexity param=0.05677868
## predicted class=N expected loss=0.06781721 P(node) =0.8563697
## class counts: 3835 279
## probabilities: 0.932 0.068
## left son=16 (3987 obs) right son=17 (127 obs)
## Primary splits:
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve=102.410700, (0 missing)
## WordCount.root2 < 25.01 to the left, improve= 47.352210, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 20.930810, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve= 5.249425, (0 missing)
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve= 2.395935, (0 missing)
##
## Node number 9: 148 observations, complexity param=0.01042874
## predicted class=Y expected loss=0.25 P(node) =0.03080766
## class counts: 37 111
## probabilities: 0.250 0.750
## left son=18 (55 obs) right son=19 (93 obs)
## Primary splits:
## WordCount.root2 < 22.72663 to the left, improve=19.274, (0 missing)
##
## Node number 10: 12 observations
## predicted class=N expected loss=0.4166667 P(node) =0.002497918
## class counts: 7 5
## probabilities: 0.583 0.417
##
## Node number 11: 93 observations
## predicted class=Y expected loss=0.03225806 P(node) =0.01935887
## class counts: 3 90
## probabilities: 0.032 0.968
##
## Node number 16: 3987 observations, complexity param=0.005793743
## predicted class=N expected loss=0.04790569 P(node) =0.8299334
## class counts: 3796 191
## probabilities: 0.952 0.048
## left son=32 (2982 obs) right son=33 (1005 obs)
## Primary splits:
## WordCount.root2 < 25.01 to the left, improve=29.253580, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve=21.978920, (0 missing)
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve= 3.887348, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve= 2.348653, (0 missing)
## NDSSName.my.fctr#U.S.#Education < 0.5 to the right, improve= 1.187739, (0 missing)
## Surrogate splits:
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the left, agree=0.758, adj=0.042, (0 split)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the left, agree=0.752, adj=0.016, (0 split)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, agree=0.750, adj=0.008, (0 split)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the left, agree=0.748, adj=0.002, (0 split)
##
## Node number 17: 127 observations, complexity param=0.003476246
## predicted class=Y expected loss=0.3070866 P(node) =0.0264363
## class counts: 39 88
## probabilities: 0.307 0.693
## left son=34 (13 obs) right son=35 (114 obs)
## Primary splits:
## WordCount.root2 < 15.32846 to the left, improve=2.753047, (0 missing)
##
## Node number 18: 55 observations, complexity param=0.002317497
## predicted class=N expected loss=0.4181818 P(node) =0.01144879
## class counts: 32 23
## probabilities: 0.582 0.418
## left son=36 (9 obs) right son=37 (46 obs)
## Primary splits:
## WordCount.root2 < 19.93708 to the right, improve=0.8264383, (0 missing)
##
## Node number 19: 93 observations
## predicted class=Y expected loss=0.05376344 P(node) =0.01935887
## class counts: 5 88
## probabilities: 0.054 0.946
##
## Node number 32: 2982 observations
## predicted class=N expected loss=0.01274313 P(node) =0.6207327
## class counts: 2944 38
## probabilities: 0.987 0.013
##
## Node number 33: 1005 observations, complexity param=0.005793743
## predicted class=N expected loss=0.1522388 P(node) =0.2092007
## class counts: 852 153
## probabilities: 0.848 0.152
## left son=66 (993 obs) right son=67 (12 obs)
## Primary splits:
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve=14.193880, (0 missing)
## NDSSName.my.fctrCulture#Arts# < 0.5 to the left, improve= 3.669601, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve= 3.556158, (0 missing)
## WordCount.root2 < 34.19795 to the left, improve= 2.582851, (0 missing)
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the right, improve= 2.031748, (0 missing)
##
## Node number 34: 13 observations
## predicted class=N expected loss=0.3846154 P(node) =0.002706078
## class counts: 8 5
## probabilities: 0.615 0.385
##
## Node number 35: 114 observations, complexity param=0.000772499
## predicted class=Y expected loss=0.2719298 P(node) =0.02373022
## class counts: 31 83
## probabilities: 0.272 0.728
## left son=70 (79 obs) right son=71 (35 obs)
## Primary splits:
## WordCount.root2 < 29.21444 to the left, improve=1.020279, (0 missing)
##
## Node number 36: 9 observations
## predicted class=N expected loss=0.2222222 P(node) =0.001873439
## class counts: 7 2
## probabilities: 0.778 0.222
##
## Node number 37: 46 observations, complexity param=0.002317497
## predicted class=N expected loss=0.4565217 P(node) =0.009575354
## class counts: 25 21
## probabilities: 0.543 0.457
## left son=74 (36 obs) right son=75 (10 obs)
## Primary splits:
## WordCount.root2 < 17.01454 to the left, improve=1.514976, (0 missing)
##
## Node number 66: 993 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.143001 P(node) =0.2067027
## class counts: 851 142
## probabilities: 0.857 0.143
## left son=132 (930 obs) right son=133 (63 obs)
## Primary splits:
## NDSSName.my.fctrCulture#Arts# < 0.5 to the left, improve=4.094729, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=3.106316, (0 missing)
## WordCount.root2 < 29.5127 to the left, improve=2.722793, (0 missing)
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve=1.962300, (0 missing)
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the right, improve=1.793603, (0 missing)
##
## Node number 67: 12 observations
## predicted class=Y expected loss=0.08333333 P(node) =0.002497918
## class counts: 1 11
## probabilities: 0.083 0.917
##
## Node number 70: 79 observations, complexity param=0.000772499
## predicted class=Y expected loss=0.3164557 P(node) =0.01644463
## class counts: 25 54
## probabilities: 0.316 0.684
## left son=140 (25 obs) right son=141 (54 obs)
## Primary splits:
## WordCount.root2 < 27.36786 to the right, improve=0.5105485, (0 missing)
##
## Node number 71: 35 observations
## predicted class=Y expected loss=0.1714286 P(node) =0.007285595
## class counts: 6 29
## probabilities: 0.171 0.829
##
## Node number 74: 36 observations, complexity param=0.001158749
## predicted class=N expected loss=0.3888889 P(node) =0.007493755
## class counts: 22 14
## probabilities: 0.611 0.389
## left son=148 (8 obs) right son=149 (28 obs)
## Primary splits:
## WordCount.root2 < 15.74773 to the right, improve=0.3968254, (0 missing)
##
## Node number 75: 10 observations
## predicted class=Y expected loss=0.3 P(node) =0.002081599
## class counts: 3 7
## probabilities: 0.300 0.700
##
## Node number 132: 930 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.1311828 P(node) =0.1935887
## class counts: 808 122
## probabilities: 0.869 0.131
## left son=264 (627 obs) right son=265 (303 obs)
## Primary splits:
## WordCount.root2 < 33.97057 to the left, improve=2.913816, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=2.586923, (0 missing)
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve=2.402029, (0 missing)
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the right, improve=1.513920, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the left, improve=1.276783, (0 missing)
## Surrogate splits:
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the left, agree=0.719, adj=0.139, (0 split)
##
## Node number 133: 63 observations, complexity param=0.0003862495
## predicted class=N expected loss=0.3174603 P(node) =0.01311407
## class counts: 43 20
## probabilities: 0.683 0.317
## left son=266 (14 obs) right son=267 (49 obs)
## Primary splits:
## WordCount.root2 < 26.99984 to the left, improve=0.38322, (0 missing)
##
## Node number 140: 25 observations, complexity param=0.000772499
## predicted class=Y expected loss=0.4 P(node) =0.005203997
## class counts: 10 15
## probabilities: 0.400 0.600
## left son=280 (8 obs) right son=281 (17 obs)
## Primary splits:
## WordCount.root2 < 28.02674 to the left, improve=1.191176, (0 missing)
##
## Node number 141: 54 observations, complexity param=0.0003862495
## predicted class=Y expected loss=0.2777778 P(node) =0.01124063
## class counts: 15 39
## probabilities: 0.278 0.722
## left son=282 (45 obs) right son=283 (9 obs)
## Primary splits:
## WordCount.root2 < 26.55173 to the left, improve=0.6, (0 missing)
##
## Node number 148: 8 observations
## predicted class=N expected loss=0.25 P(node) =0.001665279
## class counts: 6 2
## probabilities: 0.750 0.250
##
## Node number 149: 28 observations, complexity param=0.001158749
## predicted class=N expected loss=0.4285714 P(node) =0.005828476
## class counts: 16 12
## probabilities: 0.571 0.429
## left son=298 (20 obs) right son=299 (8 obs)
## Primary splits:
## WordCount.root2 < 15.06648 to the left, improve=0.8642857, (0 missing)
##
## Node number 264: 627 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.1036683 P(node) =0.1305162
## class counts: 562 65
## probabilities: 0.896 0.104
## left son=528 (561 obs) right son=529 (66 obs)
## Primary splits:
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve=2.8404170, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=1.0796950, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve=1.0670160, (0 missing)
## WordCount.root2 < 29.5127 to the left, improve=0.8966879, (0 missing)
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, improve=0.4399337, (0 missing)
##
## Node number 265: 303 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.1881188 P(node) =0.06307244
## class counts: 246 57
## probabilities: 0.812 0.188
## left son=530 (222 obs) right son=531 (81 obs)
## Primary splits:
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the left, improve=5.4890570, (0 missing)
## WordCount.root2 < 38.17067 to the right, improve=5.0156320, (0 missing)
## NDSSName.my.fctr#Opinion#RoomForDebate < 0.5 to the right, improve=3.4510070, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=1.5155860, (0 missing)
## NDSSName.my.fctr#U.S.#Education < 0.5 to the right, improve=0.8078801, (0 missing)
## Surrogate splits:
## WordCount.root2 < 34.08078 to the right, agree=0.739, adj=0.025, (0 split)
##
## Node number 266: 14 observations
## predicted class=N expected loss=0.2142857 P(node) =0.002914238
## class counts: 11 3
## probabilities: 0.786 0.214
##
## Node number 267: 49 observations, complexity param=0.0003862495
## predicted class=N expected loss=0.3469388 P(node) =0.01019983
## class counts: 32 17
## probabilities: 0.653 0.347
## left son=534 (10 obs) right son=535 (39 obs)
## Primary splits:
## WordCount.root2 < 41.56249 to the right, improve=0.5425432, (0 missing)
##
## Node number 280: 8 observations
## predicted class=N expected loss=0.375 P(node) =0.001665279
## class counts: 5 3
## probabilities: 0.625 0.375
##
## Node number 281: 17 observations
## predicted class=Y expected loss=0.2941176 P(node) =0.003538718
## class counts: 5 12
## probabilities: 0.294 0.706
##
## Node number 282: 45 observations, complexity param=0.0003862495
## predicted class=Y expected loss=0.3111111 P(node) =0.009367194
## class counts: 14 31
## probabilities: 0.311 0.689
## left son=564 (23 obs) right son=565 (22 obs)
## Primary splits:
## WordCount.root2 < 21.70252 to the right, improve=0.6050944, (0 missing)
##
## Node number 283: 9 observations
## predicted class=Y expected loss=0.1111111 P(node) =0.001873439
## class counts: 1 8
## probabilities: 0.111 0.889
##
## Node number 298: 20 observations
## predicted class=N expected loss=0.35 P(node) =0.004163197
## class counts: 13 7
## probabilities: 0.650 0.350
##
## Node number 299: 8 observations
## predicted class=Y expected loss=0.375 P(node) =0.001665279
## class counts: 3 5
## probabilities: 0.375 0.625
##
## Node number 528: 561 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.08734403 P(node) =0.1167777
## class counts: 512 49
## probabilities: 0.913 0.087
## left son=1056 (281 obs) right son=1057 (280 obs)
## Primary splits:
## WordCount.root2 < 29.33428 to the left, improve=1.5853030, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve=0.7645570, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.7250433, (0 missing)
## NDSSName.my.fctrStyles##Fashion < 0.5 to the right, improve=0.3000638, (0 missing)
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, improve=0.2729836, (0 missing)
## Surrogate splits:
## NDSSName.my.fctrMetro#N.Y./Region# < 0.5 to the left, agree=0.560, adj=0.118, (0 split)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, agree=0.533, adj=0.064, (0 split)
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, agree=0.524, adj=0.046, (0 split)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the left, agree=0.515, adj=0.029, (0 split)
## NDSSName.my.fctrStyles##Fashion < 0.5 to the right, agree=0.512, adj=0.021, (0 split)
##
## Node number 529: 66 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.2424242 P(node) =0.01373855
## class counts: 50 16
## probabilities: 0.758 0.242
## left son=1058 (38 obs) right son=1059 (28 obs)
## Primary splits:
## WordCount.root2 < 27.86575 to the left, improve=0.6070859, (0 missing)
##
## Node number 530: 222 observations
## predicted class=N expected loss=0.1306306 P(node) =0.04621149
## class counts: 193 29
## probabilities: 0.869 0.131
##
## Node number 531: 81 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.345679 P(node) =0.01686095
## class counts: 53 28
## probabilities: 0.654 0.346
## left son=1062 (15 obs) right son=1063 (66 obs)
## Primary splits:
## WordCount.root2 < 41.59766 to the right, improve=2.866218, (0 missing)
##
## Node number 534: 10 observations
## predicted class=N expected loss=0.2 P(node) =0.002081599
## class counts: 8 2
## probabilities: 0.800 0.200
##
## Node number 535: 39 observations, complexity param=0.0003862495
## predicted class=N expected loss=0.3846154 P(node) =0.008118235
## class counts: 24 15
## probabilities: 0.615 0.385
## left son=1070 (32 obs) right son=1071 (7 obs)
## Primary splits:
## WordCount.root2 < 34.23387 to the left, improve=0.595467, (0 missing)
##
## Node number 564: 23 observations, complexity param=0.0003862495
## predicted class=Y expected loss=0.3913043 P(node) =0.004787677
## class counts: 9 14
## probabilities: 0.391 0.609
## left son=1128 (7 obs) right son=1129 (16 obs)
## Primary splits:
## WordCount.root2 < 23.6326 to the left, improve=0.6529503, (0 missing)
##
## Node number 565: 22 observations
## predicted class=Y expected loss=0.2272727 P(node) =0.004579517
## class counts: 5 17
## probabilities: 0.227 0.773
##
## Node number 1056: 281 observations
## predicted class=N expected loss=0.04982206 P(node) =0.05849292
## class counts: 267 14
## probabilities: 0.950 0.050
##
## Node number 1057: 280 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.125 P(node) =0.05828476
## class counts: 245 35
## probabilities: 0.875 0.125
## left son=2114 (71 obs) right son=2115 (209 obs)
## Primary splits:
## WordCount.root2 < 32.57299 to the right, improve=0.8968765, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve=0.7830739, (0 missing)
## NDSSName.my.fctrMetro#N.Y./Region# < 0.5 to the right, improve=0.3683673, (0 missing)
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, improve=0.3578067, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.3021494, (0 missing)
##
## Node number 1058: 38 observations
## predicted class=N expected loss=0.1842105 P(node) =0.007910075
## class counts: 31 7
## probabilities: 0.816 0.184
##
## Node number 1059: 28 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.3214286 P(node) =0.005828476
## class counts: 19 9
## probabilities: 0.679 0.321
## left son=2118 (19 obs) right son=2119 (9 obs)
## Primary splits:
## WordCount.root2 < 28.6269 to the right, improve=1.454052, (0 missing)
##
## Node number 1062: 15 observations
## predicted class=N expected loss=0.06666667 P(node) =0.003122398
## class counts: 14 1
## probabilities: 0.933 0.067
##
## Node number 1063: 66 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.4090909 P(node) =0.01373855
## class counts: 39 27
## probabilities: 0.591 0.409
## left son=2126 (25 obs) right son=2127 (41 obs)
## Primary splits:
## WordCount.root2 < 35.6581 to the left, improve=1.341286, (0 missing)
##
## Node number 1070: 32 observations
## predicted class=N expected loss=0.34375 P(node) =0.006661116
## class counts: 21 11
## probabilities: 0.656 0.344
##
## Node number 1071: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.001457119
## class counts: 3 4
## probabilities: 0.429 0.571
##
## Node number 1128: 7 observations
## predicted class=N expected loss=0.4285714 P(node) =0.001457119
## class counts: 4 3
## probabilities: 0.571 0.429
##
## Node number 1129: 16 observations
## predicted class=Y expected loss=0.3125 P(node) =0.003330558
## class counts: 5 11
## probabilities: 0.312 0.688
##
## Node number 2114: 71 observations
## predicted class=N expected loss=0.05633803 P(node) =0.01477935
## class counts: 67 4
## probabilities: 0.944 0.056
##
## Node number 2115: 209 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.1483254 P(node) =0.04350541
## class counts: 178 31
## probabilities: 0.852 0.148
## left son=4230 (12 obs) right son=4231 (197 obs)
## Primary splits:
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve=0.5601729, (0 missing)
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, improve=0.5108985, (0 missing)
## WordCount.root2 < 30.09153 to the right, improve=0.4980706, (0 missing)
## NDSSName.my.fctrMetro#N.Y./Region# < 0.5 to the right, improve=0.4241343, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.3390226, (0 missing)
##
## Node number 2118: 19 observations
## predicted class=N expected loss=0.2105263 P(node) =0.003955037
## class counts: 15 4
## probabilities: 0.789 0.211
##
## Node number 2119: 9 observations
## predicted class=Y expected loss=0.4444444 P(node) =0.001873439
## class counts: 4 5
## probabilities: 0.444 0.556
##
## Node number 2126: 25 observations
## predicted class=N expected loss=0.28 P(node) =0.005203997
## class counts: 18 7
## probabilities: 0.720 0.280
##
## Node number 2127: 41 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.4878049 P(node) =0.008534555
## class counts: 21 20
## probabilities: 0.512 0.488
## left son=4254 (30 obs) right son=4255 (11 obs)
## Primary splits:
## WordCount.root2 < 36.31791 to the right, improve=0.6635625, (0 missing)
##
## Node number 4230: 12 observations
## predicted class=N expected loss=0 P(node) =0.002497918
## class counts: 12 0
## probabilities: 1.000 0.000
##
## Node number 4231: 197 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.1573604 P(node) =0.04100749
## class counts: 166 31
## probabilities: 0.843 0.157
## left son=8462 (11 obs) right son=8463 (186 obs)
## Primary splits:
## NDSSName.my.fctr#Multimedia# < 0.5 to the right, improve=0.5769882, (0 missing)
## NDSSName.my.fctrMetro#N.Y./Region# < 0.5 to the right, improve=0.5314217, (0 missing)
## WordCount.root2 < 30.09153 to the right, improve=0.4682049, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.4106319, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the right, improve=0.1814254, (0 missing)
##
## Node number 4254: 30 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.4333333 P(node) =0.006244796
## class counts: 17 13
## probabilities: 0.567 0.433
## left son=8508 (7 obs) right son=8509 (23 obs)
## Primary splits:
## WordCount.root2 < 37.14159 to the left, improve=0.3979296, (0 missing)
##
## Node number 4255: 11 observations
## predicted class=Y expected loss=0.3636364 P(node) =0.002289759
## class counts: 4 7
## probabilities: 0.364 0.636
##
## Node number 8462: 11 observations
## predicted class=N expected loss=0 P(node) =0.002289759
## class counts: 11 0
## probabilities: 1.000 0.000
##
## Node number 8463: 186 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.1666667 P(node) =0.03871774
## class counts: 155 31
## probabilities: 0.833 0.167
## left son=16926 (29 obs) right son=16927 (157 obs)
## Primary splits:
## NDSSName.my.fctrMetro#N.Y./Region# < 0.5 to the right, improve=0.6559045, (0 missing)
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.4920635, (0 missing)
## WordCount.root2 < 30.09153 to the right, improve=0.3890196, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the right, improve=0.2415584, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the left, improve=0.0126479, (0 missing)
##
## Node number 8508: 7 observations
## predicted class=N expected loss=0.2857143 P(node) =0.001457119
## class counts: 5 2
## probabilities: 0.714 0.286
##
## Node number 8509: 23 observations, complexity param=0.0005793743
## predicted class=N expected loss=0.4782609 P(node) =0.004787677
## class counts: 12 11
## probabilities: 0.522 0.478
## left son=17018 (8 obs) right son=17019 (15 obs)
## Primary splits:
## WordCount.root2 < 38.57459 to the right, improve=0.2615942, (0 missing)
##
## Node number 16926: 29 observations
## predicted class=N expected loss=0.06896552 P(node) =0.006036636
## class counts: 27 2
## probabilities: 0.931 0.069
##
## Node number 16927: 157 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.1847134 P(node) =0.0326811
## class counts: 128 29
## probabilities: 0.815 0.185
## left son=33854 (18 obs) right son=33855 (139 obs)
## Primary splits:
## NDSSName.my.fctrForeign#World#AsiaPacific < 0.5 to the right, improve=0.67831090, (0 missing)
## WordCount.root2 < 32.38827 to the left, improve=0.61044970, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the right, improve=0.38816480, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the right, improve=0.01539613, (0 missing)
## Surrogate splits:
## WordCount.root2 < 32.51922 to the right, agree=0.892, adj=0.056, (0 split)
##
## Node number 17018: 8 observations
## predicted class=N expected loss=0.375 P(node) =0.001665279
## class counts: 5 3
## probabilities: 0.625 0.375
##
## Node number 17019: 15 observations
## predicted class=Y expected loss=0.4666667 P(node) =0.003122398
## class counts: 7 8
## probabilities: 0.467 0.533
##
## Node number 33854: 18 observations
## predicted class=N expected loss=0.05555556 P(node) =0.003746878
## class counts: 17 1
## probabilities: 0.944 0.056
##
## Node number 33855: 139 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.2014388 P(node) =0.02893422
## class counts: 111 28
## probabilities: 0.799 0.201
## left son=67710 (102 obs) right son=67711 (37 obs)
## Primary splits:
## WordCount.root2 < 30.09153 to the right, improve=0.9266317, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness < 0.5 to the right, improve=0.5580040, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the right, improve=0.1306354, (0 missing)
##
## Node number 67710: 102 observations
## predicted class=N expected loss=0.1666667 P(node) =0.02123231
## class counts: 85 17
## probabilities: 0.833 0.167
##
## Node number 67711: 37 observations, complexity param=0.0004213631
## predicted class=N expected loss=0.2972973 P(node) =0.007701915
## class counts: 26 11
## probabilities: 0.703 0.297
## left son=135422 (30 obs) right son=135423 (7 obs)
## Primary splits:
## WordCount.root2 < 29.92488 to the left, improve=3.00231700, (0 missing)
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook < 0.5 to the left, improve=0.01303089, (0 missing)
##
## Node number 135422: 30 observations
## predicted class=N expected loss=0.2 P(node) =0.006244796
## class counts: 24 6
## probabilities: 0.800 0.200
##
## Node number 135423: 7 observations
## predicted class=Y expected loss=0.2857143 P(node) =0.001457119
## class counts: 2 5
## probabilities: 0.286 0.714
##
## n= 4804
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4804 863 N (0.82035803 0.17964197)
## 2) NDSSName.my.fctrOpEd#Opinion#< 0.5 4367 485 N (0.88893978 0.11106022)
## 4) NDSSName.my.fctrBusiness#Crosswords/Games#< 0.5 4262 390 N (0.90849366 0.09150634)
## 8) NDSSName.my.fctrScience#Health#< 0.5 4114 279 N (0.93218279 0.06781721)
## 16) NDSSName.my.fctrStyles#U.S.#< 0.5 3987 191 N (0.95209431 0.04790569)
## 32) WordCount.root2< 25.01 2982 38 N (0.98725687 0.01274313) *
## 33) WordCount.root2>=25.01 1005 153 N (0.84776119 0.15223881)
## 66) NDSSName.my.fctr#Opinion#ThePublicEditor< 0.5 993 142 N (0.85699899 0.14300101)
## 132) NDSSName.my.fctrCulture#Arts#< 0.5 930 122 N (0.86881720 0.13118280)
## 264) WordCount.root2< 33.97057 627 65 N (0.89633174 0.10366826)
## 528) NDSSName.my.fctrBusiness#Technology#< 0.5 561 49 N (0.91265597 0.08734403)
## 1056) WordCount.root2< 29.33428 281 14 N (0.95017794 0.04982206) *
## 1057) WordCount.root2>=29.33428 280 35 N (0.87500000 0.12500000)
## 2114) WordCount.root2>=32.57299 71 4 N (0.94366197 0.05633803) *
## 2115) WordCount.root2< 32.57299 209 31 N (0.85167464 0.14832536)
## 4230) NDSSName.my.fctrTStyle##>=0.5 12 0 N (1.00000000 0.00000000) *
## 4231) NDSSName.my.fctrTStyle##< 0.5 197 31 N (0.84263959 0.15736041)
## 8462) NDSSName.my.fctr#Multimedia#>=0.5 11 0 N (1.00000000 0.00000000) *
## 8463) NDSSName.my.fctr#Multimedia#< 0.5 186 31 N (0.83333333 0.16666667)
## 16926) NDSSName.my.fctrMetro#N.Y./Region#>=0.5 29 2 N (0.93103448 0.06896552) *
## 16927) NDSSName.my.fctrMetro#N.Y./Region#< 0.5 157 29 N (0.81528662 0.18471338)
## 33854) NDSSName.my.fctrForeign#World#AsiaPacific>=0.5 18 1 N (0.94444444 0.05555556) *
## 33855) NDSSName.my.fctrForeign#World#AsiaPacific< 0.5 139 28 N (0.79856115 0.20143885)
## 67710) WordCount.root2>=30.09153 102 17 N (0.83333333 0.16666667) *
## 67711) WordCount.root2< 30.09153 37 11 N (0.70270270 0.29729730)
## 135422) WordCount.root2< 29.92488 30 6 N (0.80000000 0.20000000) *
## 135423) WordCount.root2>=29.92488 7 2 Y (0.28571429 0.71428571) *
## 529) NDSSName.my.fctrBusiness#Technology#>=0.5 66 16 N (0.75757576 0.24242424)
## 1058) WordCount.root2< 27.86575 38 7 N (0.81578947 0.18421053) *
## 1059) WordCount.root2>=27.86575 28 9 N (0.67857143 0.32142857)
## 2118) WordCount.root2>=28.6269 19 4 N (0.78947368 0.21052632) *
## 2119) WordCount.root2< 28.6269 9 4 Y (0.44444444 0.55555556) *
## 265) WordCount.root2>=33.97057 303 57 N (0.81188119 0.18811881)
## 530) NDSSName.my.fctrBusiness#BusinessDay#Dealbook< 0.5 222 29 N (0.86936937 0.13063063) *
## 531) NDSSName.my.fctrBusiness#BusinessDay#Dealbook>=0.5 81 28 N (0.65432099 0.34567901)
## 1062) WordCount.root2>=41.59766 15 1 N (0.93333333 0.06666667) *
## 1063) WordCount.root2< 41.59766 66 27 N (0.59090909 0.40909091)
## 2126) WordCount.root2< 35.6581 25 7 N (0.72000000 0.28000000) *
## 2127) WordCount.root2>=35.6581 41 20 N (0.51219512 0.48780488)
## 4254) WordCount.root2>=36.31791 30 13 N (0.56666667 0.43333333)
## 8508) WordCount.root2< 37.14159 7 2 N (0.71428571 0.28571429) *
## 8509) WordCount.root2>=37.14159 23 11 N (0.52173913 0.47826087)
## 17018) WordCount.root2>=38.57459 8 3 N (0.62500000 0.37500000) *
## 17019) WordCount.root2< 38.57459 15 7 Y (0.46666667 0.53333333) *
## 4255) WordCount.root2< 36.31791 11 4 Y (0.36363636 0.63636364) *
## 133) NDSSName.my.fctrCulture#Arts#>=0.5 63 20 N (0.68253968 0.31746032)
## 266) WordCount.root2< 26.99984 14 3 N (0.78571429 0.21428571) *
## 267) WordCount.root2>=26.99984 49 17 N (0.65306122 0.34693878)
## 534) WordCount.root2>=41.56249 10 2 N (0.80000000 0.20000000) *
## 535) WordCount.root2< 41.56249 39 15 N (0.61538462 0.38461538)
## 1070) WordCount.root2< 34.23387 32 11 N (0.65625000 0.34375000) *
## 1071) WordCount.root2>=34.23387 7 3 Y (0.42857143 0.57142857) *
## 67) NDSSName.my.fctr#Opinion#ThePublicEditor>=0.5 12 1 Y (0.08333333 0.91666667) *
## 17) NDSSName.my.fctrStyles#U.S.#>=0.5 127 39 Y (0.30708661 0.69291339)
## 34) WordCount.root2< 15.32846 13 5 N (0.61538462 0.38461538) *
## 35) WordCount.root2>=15.32846 114 31 Y (0.27192982 0.72807018)
## 70) WordCount.root2< 29.21444 79 25 Y (0.31645570 0.68354430)
## 140) WordCount.root2>=27.36786 25 10 Y (0.40000000 0.60000000)
## 280) WordCount.root2< 28.02674 8 3 N (0.62500000 0.37500000) *
## 281) WordCount.root2>=28.02674 17 5 Y (0.29411765 0.70588235) *
## 141) WordCount.root2< 27.36786 54 15 Y (0.27777778 0.72222222)
## 282) WordCount.root2< 26.55173 45 14 Y (0.31111111 0.68888889)
## 564) WordCount.root2>=21.70252 23 9 Y (0.39130435 0.60869565)
## 1128) WordCount.root2< 23.6326 7 3 N (0.57142857 0.42857143) *
## 1129) WordCount.root2>=23.6326 16 5 Y (0.31250000 0.68750000) *
## 565) WordCount.root2< 21.70252 22 5 Y (0.22727273 0.77272727) *
## 283) WordCount.root2>=26.55173 9 1 Y (0.11111111 0.88888889) *
## 71) WordCount.root2>=29.21444 35 6 Y (0.17142857 0.82857143) *
## 9) NDSSName.my.fctrScience#Health#>=0.5 148 37 Y (0.25000000 0.75000000)
## 18) WordCount.root2< 22.72663 55 23 N (0.58181818 0.41818182)
## 36) WordCount.root2>=19.93708 9 2 N (0.77777778 0.22222222) *
## 37) WordCount.root2< 19.93708 46 21 N (0.54347826 0.45652174)
## 74) WordCount.root2< 17.01454 36 14 N (0.61111111 0.38888889)
## 148) WordCount.root2>=15.74773 8 2 N (0.75000000 0.25000000) *
## 149) WordCount.root2< 15.74773 28 12 N (0.57142857 0.42857143)
## 298) WordCount.root2< 15.06648 20 7 N (0.65000000 0.35000000) *
## 299) WordCount.root2>=15.06648 8 3 Y (0.37500000 0.62500000) *
## 75) WordCount.root2>=17.01454 10 3 Y (0.30000000 0.70000000) *
## 19) WordCount.root2>=22.72663 93 5 Y (0.05376344 0.94623656) *
## 5) NDSSName.my.fctrBusiness#Crosswords/Games#>=0.5 105 10 Y (0.09523810 0.90476190)
## 10) WordCount.root2< 18.9043 12 5 N (0.58333333 0.41666667) *
## 11) WordCount.root2>=18.9043 93 3 Y (0.03225806 0.96774194) *
## 3) NDSSName.my.fctrOpEd#Opinion#>=0.5 437 59 Y (0.13501144 0.86498856) *
## Prediction
## Reference N Y
## N 3814 127
## Y 170 693
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.381765e-01 7.860827e-01 9.309917e-01 9.448229e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 2.798570e-127 1.480611e-02
## Prediction
## Reference N Y
## N 1180 318
## Y 84 146
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.673611e-01 2.953321e-01 7.467059e-01 7.871043e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.224022e-31
## id feats
## 1 Max.cor.Y.rcv.1X1.cp.0###rpart WordCount.root2,NDSSName.my.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 0.87 0.07
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8821543 0.9705658 0.7937428 0.9504198
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8235294 0.9381765
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9309917 0.9448229 0.7860827
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.6174697 0.9218959 0.3130435 0.7773858
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4207493 0.7673611
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7467059 0.7871043 0.2953321
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
# if (glb_is_regression || glb_is_binomial) # For multinomials this model will be run next by default
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Max.cor.Y",
type=glb_model_type, trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds, trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = FALSE,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method="rpart")),
indep_vars=max_cor_y_x_vars, rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
## [1] "fitting model: Max.cor.Y##rcv#rpart"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr"
## + Fold1.Rep1: cp=0.01043
## - Fold1.Rep1: cp=0.01043
## + Fold2.Rep1: cp=0.01043
## - Fold2.Rep1: cp=0.01043
## + Fold3.Rep1: cp=0.01043
## - Fold3.Rep1: cp=0.01043
## + Fold1.Rep2: cp=0.01043
## - Fold1.Rep2: cp=0.01043
## + Fold2.Rep2: cp=0.01043
## - Fold2.Rep2: cp=0.01043
## + Fold3.Rep2: cp=0.01043
## - Fold3.Rep2: cp=0.01043
## + Fold1.Rep3: cp=0.01043
## - Fold1.Rep3: cp=0.01043
## + Fold2.Rep3: cp=0.01043
## - Fold2.Rep3: cp=0.01043
## + Fold3.Rep3: cp=0.01043
## - Fold3.Rep3: cp=0.01043
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.0104 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = "Max.cor.Y", : model's bestTune found at an extreme of
## tuneGrid for parameter: cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 4804
##
## CP nsplit rel error
## 1 0.36964079 0 1.0000000
## 2 0.09849363 1 0.6303592
## 3 0.08574739 2 0.5318656
## 4 0.05677868 3 0.4461182
## 5 0.01042874 4 0.3893395
##
## Variable importance
## NDSSName.my.fctrOpEd#Opinion#
## 55
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 16
## NDSSName.my.fctrScience#Health#
## 16
## NDSSName.my.fctrStyles#U.S.#
## 12
##
## Node number 1: 4804 observations, complexity param=0.3696408
## predicted class=N expected loss=0.179642 P(node) =1
## class counts: 3941 863
## probabilities: 0.820 0.180
## left son=2 (4367 obs) right son=3 (437 obs)
## Primary splits:
## NDSSName.my.fctrOpEd#Opinion# < 0.5 to the left, improve=451.59770, (0 missing)
## NDSSName.my.fctrBusiness#Crosswords/Games# < 0.5 to the left, improve=112.88510, (0 missing)
## WordCount.root2 < 25.75849 to the left, improve=111.17610, (0 missing)
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve= 99.35206, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 68.73272, (0 missing)
##
## Node number 2: 4367 observations, complexity param=0.09849363
## predicted class=N expected loss=0.1110602 P(node) =0.9090341
## class counts: 3882 485
## probabilities: 0.889 0.111
## left son=4 (4262 obs) right son=5 (105 obs)
## Primary splits:
## NDSSName.my.fctrBusiness#Crosswords/Games# < 0.5 to the left, improve=135.55130, (0 missing)
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve=125.07920, (0 missing)
## WordCount.root2 < 25.75849 to the left, improve= 94.70710, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 88.56821, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 18.74400, (0 missing)
##
## Node number 3: 437 observations
## predicted class=Y expected loss=0.1350114 P(node) =0.09096586
## class counts: 59 378
## probabilities: 0.135 0.865
##
## Node number 4: 4262 observations, complexity param=0.08574739
## predicted class=N expected loss=0.09150634 P(node) =0.8871774
## class counts: 3872 390
## probabilities: 0.908 0.092
## left son=8 (4114 obs) right son=9 (148 obs)
## Primary splits:
## NDSSName.my.fctrScience#Health# < 0.5 to the left, improve=132.96710, (0 missing)
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve= 94.69099, (0 missing)
## WordCount.root2 < 26.49528 to the left, improve= 84.07487, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 19.71762, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve= 10.17000, (0 missing)
##
## Node number 5: 105 observations
## predicted class=Y expected loss=0.0952381 P(node) =0.02185679
## class counts: 10 95
## probabilities: 0.095 0.905
##
## Node number 8: 4114 observations, complexity param=0.05677868
## predicted class=N expected loss=0.06781721 P(node) =0.8563697
## class counts: 3835 279
## probabilities: 0.932 0.068
## left son=16 (3987 obs) right son=17 (127 obs)
## Primary splits:
## NDSSName.my.fctrStyles#U.S.# < 0.5 to the left, improve=102.410700, (0 missing)
## WordCount.root2 < 25.01 to the left, improve= 47.352210, (0 missing)
## NDSSName.my.fctr#Opinion#ThePublicEditor < 0.5 to the left, improve= 20.930810, (0 missing)
## NDSSName.my.fctrTStyle## < 0.5 to the right, improve= 5.249425, (0 missing)
## NDSSName.my.fctrBusiness#Technology# < 0.5 to the left, improve= 2.395935, (0 missing)
##
## Node number 9: 148 observations
## predicted class=Y expected loss=0.25 P(node) =0.03080766
## class counts: 37 111
## probabilities: 0.250 0.750
##
## Node number 16: 3987 observations
## predicted class=N expected loss=0.04790569 P(node) =0.8299334
## class counts: 3796 191
## probabilities: 0.952 0.048
##
## Node number 17: 127 observations
## predicted class=Y expected loss=0.3070866 P(node) =0.0264363
## class counts: 39 88
## probabilities: 0.307 0.693
##
## n= 4804
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 4804 863 N (0.82035803 0.17964197)
## 2) NDSSName.my.fctrOpEd#Opinion#< 0.5 4367 485 N (0.88893978 0.11106022)
## 4) NDSSName.my.fctrBusiness#Crosswords/Games#< 0.5 4262 390 N (0.90849366 0.09150634)
## 8) NDSSName.my.fctrScience#Health#< 0.5 4114 279 N (0.93218279 0.06781721)
## 16) NDSSName.my.fctrStyles#U.S.#< 0.5 3987 191 N (0.95209431 0.04790569) *
## 17) NDSSName.my.fctrStyles#U.S.#>=0.5 127 39 Y (0.30708661 0.69291339) *
## 9) NDSSName.my.fctrScience#Health#>=0.5 148 37 Y (0.25000000 0.75000000) *
## 5) NDSSName.my.fctrBusiness#Crosswords/Games#>=0.5 105 10 Y (0.09523810 0.90476190) *
## 3) NDSSName.my.fctrOpEd#Opinion#>=0.5 437 59 Y (0.13501144 0.86498856) *
## Prediction
## Reference N Y
## N 3796 145
## Y 191 672
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.300583e-01 7.576571e-01 9.224771e-01 9.371115e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 4.458834e-108 1.409037e-02
## Prediction
## Reference N Y
## N 1355 143
## Y 168 62
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.8200231 0.1825002 0.8010821 0.8378705 0.8668981
## AccuracyPValue McnemarPValue
## 1.0000000 0.1735405
## id feats max.nTuningRuns
## 1 Max.cor.Y##rcv#rpart WordCount.root2,NDSSName.my.fctr 5
## min.elapsedtime.everything min.elapsedtime.final max.AUCpROC.fit
## 1 2.935 0.073 0.8709432
## max.Sens.fit max.Spec.fit max.AUCROCR.fit opt.prob.threshold.fit
## 1 0.9632073 0.778679 0.8746354 0.6
## max.f.score.fit max.Accuracy.fit max.AccuracyLower.fit
## 1 0.8 0.9296422 0.9224771
## max.AccuracyUpper.fit max.Kappa.fit max.AUCpROC.OOB max.Sens.OOB
## 1 0.9371115 0.7515134 0.5870523 0.9045394
## max.Spec.OOB max.AUCROCR.OOB opt.prob.threshold.OOB max.f.score.OOB
## 1 0.2695652 0.5892132 0.6 0.2850575
## max.Accuracy.OOB max.AccuracyLower.OOB max.AccuracyUpper.OOB
## 1 0.8200231 0.8010821 0.8378705
## max.Kappa.OOB max.AccuracySD.fit max.KappaSD.fit
## 1 0.1825002 0.00506952 0.0191091
if ((length(glbFeatsDateTime) > 0) &&
(sum(grepl(paste(names(glbFeatsDateTime), "\\.day\\.minutes\\.poly\\.", sep = ""),
names(glbObsAll))) > 0)) {
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Max.cor.Y.TmSrs.poly",
type=glb_model_type, trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds, trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method="glmnet")),
indep_vars = c(max_cor_y_x_vars,
grep(paste(names(glbFeatsDateTime), "\\.day\\.minutes\\.poly\\.", sep=""),
names(glbObsAll), value = TRUE)),
rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
## [1] "fitting model: Max.cor.Y.TmSrs.poly##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.day.minutes.poly.2,PubDate.day.minutes.poly.3,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.5,PubDate.day.minutes.poly.1.ctg,PubDate.day.minutes.poly.2.ctg,PubDate.day.minutes.poly.3.ctg,PubDate.day.minutes.poly.4.ctg,PubDate.day.minutes.poly.5.ctg"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0201 on full training set
## Length Class Mode
## a0 95 -none- numeric
## beta 2945 dgCMatrix S4
## df 95 -none- numeric
## dim 2 -none- numeric
## lambda 95 -none- numeric
## dev.ratio 95 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 31 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.824918286
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.730902967
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 2.927432404
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.640472806
## NDSSName.my.fctrBusiness#Technology#
## 0.192915649
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.002462006
## NDSSName.my.fctrOpEd#Opinion#
## 3.940210915
## NDSSName.my.fctrScience#Health#
## 3.120520277
## NDSSName.my.fctrStyles#U.S.#
## 2.887354523
## NDSSName.my.fctrTStyle##
## -0.349210176
## PubDate.day.minutes.poly.1
## 10.171710001
## PubDate.day.minutes.poly.2
## 1.938052563
## PubDate.day.minutes.poly.4
## 0.422263612
## WordCount.root2
## 0.053515500
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.902999004
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.872538727
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 3.038605329
## NDSSName.my.fctr#U.S.#Education
## -0.004334849
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 3.700053147
## NDSSName.my.fctrBusiness#Technology#
## 0.274702193
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.061833760
## NDSSName.my.fctrOpEd#Opinion#
## 4.010945376
## NDSSName.my.fctrScience#Health#
## 3.177975989
## NDSSName.my.fctrStyles#U.S.#
## 2.950583177
## NDSSName.my.fctrTStyle##
## -0.393389148
## PubDate.day.minutes.poly.1
## 11.082450379
## PubDate.day.minutes.poly.2
## 2.849459283
## PubDate.day.minutes.poly.4
## 1.090937007
## WordCount.root2
## 0.055710821
## Prediction
## Reference N Y
## N 3796 145
## Y 177 686
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.329725e-01 7.692476e-01 9.255302e-01 9.398832e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 1.026390e-114 8.406670e-02
## Prediction
## Reference N Y
## N 1185 313
## Y 75 155
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.754630e-01 3.233542e-01 7.550404e-01 7.949457e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.416777e-33
## id
## 1 Max.cor.Y.TmSrs.poly##rcv#glmnet
## feats
## 1 WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.day.minutes.poly.2,PubDate.day.minutes.poly.3,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.5,PubDate.day.minutes.poly.1.ctg,PubDate.day.minutes.poly.2.ctg,PubDate.day.minutes.poly.3.ctg,PubDate.day.minutes.poly.4.ctg,PubDate.day.minutes.poly.5.ctg
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 5.181 0.353
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.874855 0.9652372 0.7844728 0.9565422
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8099174 0.9323484
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9255302 0.9398832 0.7646741
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.596578 0.9105474 0.2826087 0.8049472
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4441261 0.775463
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7550404 0.7949457 0.3233542
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.005312169 0.01855071
# Interactions.High.cor.Y
if (length(int_feats <- setdiff(setdiff(unique(glb_feats_df$cor.high.X), NA),
subset(glb_feats_df, nzv)$id)) > 0) {
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Interact.High.cor.Y",
type=glb_model_type, trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds, trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method="glmnet")),
indep_vars=c(max_cor_y_x_vars, paste(max_cor_y_x_vars[1], int_feats, sep=":")),
rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
}
## [1] "fitting model: Interact.High.cor.Y##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr,WordCount.root2:WordCount.root2,WordCount.root2:PubDate.day.minutes.poly.1,WordCount.root2:PubDate.last4.log1p,WordCount.root2:PubDate.month.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.775, lambda = 0.000934 on full training set
## Length Class Mode
## a0 92 -none- numeric
## beta 2392 dgCMatrix S4
## df 92 -none- numeric
## dim 2 -none- numeric
## lambda 92 -none- numeric
## dev.ratio 92 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 26 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -4.8684452161
## NDSSName.my.fctr#Multimedia#
## -1.0487925341
## NDSSName.my.fctr#Opinion#RoomForDebate
## -5.4745572239
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 4.2885428864
## NDSSName.my.fctr#U.S.#Education
## -2.8325574053
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.3151450444
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.7572259115
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 4.3391439837
## NDSSName.my.fctrBusiness#Technology#
## 0.8045020529
## NDSSName.my.fctrCulture#Arts#
## -0.3229201625
## NDSSName.my.fctrForeign#World#
## -1.8392467451
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.8077498032
## NDSSName.my.fctrMetro#N.Y./Region#
## 0.2627263003
## NDSSName.my.fctrOpEd#Opinion#
## 4.7415088342
## NDSSName.my.fctrScience#Health#
## 3.7483695286
## NDSSName.my.fctrStyles##Fashion
## -2.4194040890
## NDSSName.my.fctrStyles#U.S.#
## 3.4229733393
## NDSSName.my.fctrTStyle##
## -2.0211781889
## NDSSName.my.fctrTravel#Travel#
## -1.7751305343
## NDSSName.my.fctrmyOther
## -2.3074809466
## WordCount.root2
## 0.0361357822
## WordCount.root2:PubDate.day.minutes.poly.1
## 1.0813556024
## WordCount.root2:PubDate.last4.log1p
## 0.0069832077
## WordCount.root2:PubDate.month.fctr10
## 0.0039570640
## WordCount.root2:PubDate.month.fctr11
## -0.0001256918
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -4.8709915569
## NDSSName.my.fctr#Multimedia#
## -1.0849702043
## NDSSName.my.fctr#Opinion#RoomForDebate
## -5.5929746399
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 4.2928665989
## NDSSName.my.fctr#U.S.#Education
## -2.9379189459
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook
## -0.3276698231
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness
## -0.7772032984
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 4.3366629069
## NDSSName.my.fctrBusiness#Technology#
## 0.8004536454
## NDSSName.my.fctrCulture#Arts#
## -0.3372244058
## NDSSName.my.fctrForeign#World#
## -1.9323067428
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.8410299268
## NDSSName.my.fctrMetro#N.Y./Region#
## 0.2641549470
## NDSSName.my.fctrOpEd#Opinion#
## 4.7402435031
## NDSSName.my.fctrScience#Health#
## 3.7456718341
## NDSSName.my.fctrStyles##Fashion
## -2.5160313263
## NDSSName.my.fctrStyles#U.S.#
## 3.4196115772
## NDSSName.my.fctrTStyle##
## -2.0543357159
## NDSSName.my.fctrTravel#Travel#
## -1.8687591112
## NDSSName.my.fctrmyOther
## -2.4107173389
## WordCount.root2
## 0.0361773869
## WordCount.root2:PubDate.day.minutes.poly.1
## 1.0887365081
## WordCount.root2:PubDate.last4.log1p
## 0.0070269546
## WordCount.root2:PubDate.month.fctr10
## 0.0039706413
## WordCount.root2:PubDate.month.fctr11
## -0.0002339144
## Prediction
## Reference N Y
## N 3787 154
## Y 173 690
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.319317e-01 7.670549e-01 9.244394e-01 9.388938e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 2.593356e-112 3.195407e-01
## Prediction
## Reference N Y
## N 1164 334
## Y 71 159
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.656250e-01 3.156027e-01 7.449213e-01 7.854227e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 9.555643e-39
## id
## 1 Interact.High.cor.Y##rcv#glmnet
## feats
## 1 WordCount.root2,NDSSName.my.fctr,WordCount.root2:WordCount.root2,WordCount.root2:PubDate.day.minutes.poly.1,WordCount.root2:PubDate.last4.log1p,WordCount.root2:PubDate.month.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 5.038 0.322
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8776419 0.9626998 0.792584 0.9625372
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.8084359 0.931585
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9244394 0.9388938 0.764104
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.6009259 0.9105474 0.2913043 0.8140971
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.439834 0.765625
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7449213 0.7854227 0.3156027
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.005250654 0.01810996
# Low.cor.X
# if (glb_is_classification && glb_is_binomial)
# indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) &
# is.ConditionalX.y &
# (exclude.as.feat != 1))[, "id"] else
indep_vars <- subset(glb_feats_df, is.na(cor.high.X) & !nzv &
(exclude.as.feat != 1))[, "id"]
indep_vars <- myadjust_interaction_feats(indep_vars)
# Experiment specific code to avoid caret crash
glmnet_tune_models_df <- rbind(data.frame()
,data.frame(method = "glmnet", parameter = "alpha",
vals = "0.100 0.325 0.550 0.775 1.000")
,data.frame(method = "glmnet", parameter = "lambda",
vals = "9.342e-02")
)
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix="Low.cor.X",
type=glb_model_type,
tune.df = glmnet_tune_models_df,
trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds, trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method="glmnet")),
indep_vars=indep_vars, rsp_var=glb_rsp_var,
fit_df=glbObsFit, OOB_df=glbObsOOB)
## [1] "fitting model: Low.cor.X##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0934 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = "Low.cor.X", : model's bestTune found at an extreme of
## tuneGrid for parameter: alpha
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = "Low.cor.X", : model's bestTune found at an extreme of
## tuneGrid for parameter: lambda
## Length Class Mode
## a0 100 -none- numeric
## beta 26100 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 261 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.202862e+00
## NDSSName.my.fctr#Opinion#RoomForDebate
## -8.521086e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 6.880206e-01
## NDSSName.my.fctr#U.S.#Education
## -5.784059e-02
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 6.701988e-01
## NDSSName.my.fctrBusiness#Technology#
## 2.290481e-02
## NDSSName.my.fctrForeign#World#AsiaPacific
## -3.274668e-02
## NDSSName.my.fctrOpEd#Opinion#
## 7.842207e-01
## NDSSName.my.fctrScience#Health#
## 8.362226e-01
## NDSSName.my.fctrStyles##Fashion
## -3.079107e-02
## NDSSName.my.fctrStyles#U.S.#
## 6.586231e-01
## NDSSName.my.fctrTStyle##
## -9.956225e-02
## PubDate.day.minutes.poly.1
## 6.452334e+00
## PubDate.day.minutes.poly.2
## 3.185151e+00
## PubDate.day.minutes.poly.4
## 8.173930e-01
## PubDate.last4.log1p
## 4.098268e-03
## PubDate.wkday.fctr5
## -2.275236e-05
## PubDate.wkend
## 1.052728e-01
## WordCount.root2
## 2.981460e-02
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.144109e+00
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -4.058241e+00
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.435473e+00
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 4.534495e+00
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 4.741264e-01
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 1.162763e-01
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 1.806309e+00
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -4.705401e-03
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 4.304598e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -3.317348e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 4.698895e-02
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -2.205370e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 4.593643e-02
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 3.155326e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -4.428299e-04
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 2.398555e-02
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -6.710680e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -7.172957e-03
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 3.276090e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -5.154022e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 6.583764e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 8.924455e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -3.157626e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 6.742134e-02
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 5.375306e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -1.106633e-03
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 5.997036e-02
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -9.591957e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -4.820085e-03
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 4.357947e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -2.077560e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 4.049781e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 3.962558e-02
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 3.048215e-02
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 2.113852e-02
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -5.645530e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -6.756004e-03
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 3.427222e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -4.871129e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 6.144420e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 5.221287e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -3.128905e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 5.939321e-02
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 4.046602e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -9.613579e-04
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 5.354044e-02
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -8.318684e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -6.377549e-03
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 4.012450e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -4.056166e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 4.714690e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 3.372173e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -3.104981e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 5.275033e-02
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 3.856981e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -1.044166e-03
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 3.610233e-02
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -7.703998e-03
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -3.2917372206
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.1077991963
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 0.7145162057
## NDSSName.my.fctr#U.S.#Education
## -0.0675153576
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 0.6847086604
## NDSSName.my.fctrBusiness#Technology#
## 0.0338425465
## NDSSName.my.fctrForeign#World#
## -0.0056510270
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.0431336996
## NDSSName.my.fctrOpEd#Opinion#
## 0.8080156260
## NDSSName.my.fctrScience#Health#
## 0.8720969876
## NDSSName.my.fctrStyles##Fashion
## -0.0449467556
## NDSSName.my.fctrStyles#U.S.#
## 0.6848458465
## NDSSName.my.fctrTStyle##
## -0.1068542341
## PubDate.day.minutes.poly.1
## 6.9218320529
## PubDate.day.minutes.poly.2
## 3.7125649527
## PubDate.day.minutes.poly.4
## 1.0946423451
## PubDate.last4.log1p
## 0.0074154519
## PubDate.wkday.fctr5
## -0.0099640217
## PubDate.wkend
## 0.1191625256
## WordCount.root2
## 0.0317225870
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.4076395302
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -5.5235755281
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.6550615516
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 5.3180890769
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 0.7422606550
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 0.4015516661
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg
## 1.2519607267
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg
## 0.0151914873
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 2.7388433211
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -0.0063971273
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 0.0444497010
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -0.0040421728
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 0.0476287025
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -0.0030456278
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 0.0463442551
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 0.0314477085
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -0.0014930601
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 0.0234751275
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -0.0072231460
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -0.0093789969
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 0.0336400023
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -0.0061272849
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 0.0671838538
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 0.0105210601
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -0.0042188810
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 0.0687814614
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 0.0550257721
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -0.0024258118
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 0.0620565904
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -0.0103290150
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -0.0064579863
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 0.0450789075
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -0.0027461150
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 0.0408247292
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg
## 0.0003621433
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -0.0005633660
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 0.0397403693
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 0.0304028028
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg
## -0.0001455953
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 0.0205734403
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -0.0060725179
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -0.0087853392
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 0.0351829015
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -0.0057617515
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 0.0627276287
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 0.0064552363
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -0.0041041644
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 0.0603096891
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 0.0407244990
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -0.0021050459
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 0.0551911420
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -0.0089469436
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -0.0082606820
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 0.0412828578
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -0.0048320742
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 0.0475545601
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 0.0044231483
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -0.0040431528
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 0.0534726687
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 0.0389034575
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -0.0021193073
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 0.0364380868
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -0.0082891522
## Prediction
## Reference N Y
## N 3787 154
## Y 174 689
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.317236e-01 7.662359e-01 9.242213e-01 9.386958e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 7.764099e-112 2.941323e-01
## Prediction
## Reference N Y
## N 1209 289
## Y 94 136
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.783565e-01 2.931798e-01 7.580195e-01 7.977437e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.657340e-23
## id
## 1 Low.cor.X##rcv#glmnet
## feats
## 1 WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 5 72.242 4.675
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8624894 0.9659985 0.7589803 0.958864
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.2 0.8077374 0.9276303
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9242213 0.9386958 0.7453708
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5917252 0.9138852 0.2695652 0.8052766
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4152672 0.7783565
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7580195 0.7977437 0.2931798
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.004942454 0.01832711
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 10 fit.models 6 0 0 47.064 246.207 199.143
## 11 fit.models 6 1 1 246.208 NA NA
fit.models_1_chunk_df <- myadd_chunk(NULL, "fit.models_1_bgn", label.minor="setup")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_1_bgn 1 0 setup 258.668 NA NA
#stop(here"); glb2Sav(); all.equal(glb_models_df, sav_models_df)
topindep_var <- NULL; interact_vars <- NULL;
for (mdl_id_pfx in names(glb_mdl_family_lst)) {
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, paste0("fit.models_1_", mdl_id_pfx),
major.inc = FALSE, label.minor = "setup")
indep_vars <- NULL;
if (grepl("\\.Interact", mdl_id_pfx)) {
if (is.null(topindep_var) && is.null(interact_vars)) {
# select best glmnet model upto now
dsp_models_df <- orderBy(model_sel_frmla <- get_model_sel_frmla(),
glb_models_df)
dsp_models_df <- subset(dsp_models_df,
grepl(".glmnet", id, fixed = TRUE))
bst_mdl_id <- dsp_models_df$id[1]
mdl_id_pfx <-
paste(c(head(unlist(strsplit(bst_mdl_id, "[.]")), -1), "Interact"),
collapse=".")
# select important features
if (is.null(bst_featsimp_df <-
myget_feats_importance(glb_models_lst[[bst_mdl_id]]))) {
warning("Base model for RFE.Interact: ", bst_mdl_id,
" has no important features")
next
}
topindep_ix <- 1
while (is.null(topindep_var) && (topindep_ix <= nrow(bst_featsimp_df))) {
topindep_var <- row.names(bst_featsimp_df)[topindep_ix]
if (grepl(".fctr", topindep_var, fixed=TRUE))
topindep_var <-
paste0(unlist(strsplit(topindep_var, ".fctr"))[1], ".fctr")
if (topindep_var %in% names(glbFeatsInteractionOnly)) {
topindep_var <- NULL; topindep_ix <- topindep_ix + 1
} else break
}
# select features with importance > max(10, importance of .rnorm) & is not highest
# combine factor dummy features to just the factor feature
if (length(pos_rnorm <-
grep(".rnorm", row.names(bst_featsimp_df), fixed=TRUE)) > 0)
imp_rnorm <- bst_featsimp_df[pos_rnorm, 1] else
imp_rnorm <- NA
imp_cutoff <- max(10, imp_rnorm, na.rm=TRUE)
interact_vars <-
tail(row.names(subset(bst_featsimp_df,
imp > imp_cutoff)), -1)
if (length(interact_vars) > 0) {
interact_vars <-
myadjust_interaction_feats(myextract_actual_feats(interact_vars))
interact_vars <-
interact_vars[!grepl(topindep_var, interact_vars, fixed=TRUE)]
}
### bid0_sp only
# interact_vars <- c(
# "biddable", "D.ratio.sum.TfIdf.wrds.n", "D.TfIdf.sum.stem.stop.Ratio", "D.sum.TfIdf",
# "D.TfIdf.sum.post.stop", "D.TfIdf.sum.post.stem", "D.ratio.wrds.stop.n.wrds.n", "D.chrs.uppr.n.log",
# "D.chrs.n.log", "color.fctr"
# # , "condition.fctr", "prdl.my.descr.fctr"
# )
# interact_vars <- setdiff(interact_vars, c("startprice.dgt2.is9", "color.fctr"))
###
indep_vars <- myextract_actual_feats(row.names(bst_featsimp_df))
indep_vars <- setdiff(indep_vars, topindep_var)
if (length(interact_vars) > 0) {
indep_vars <-
setdiff(indep_vars, myextract_actual_feats(interact_vars))
indep_vars <- c(indep_vars,
paste(topindep_var, setdiff(interact_vars, topindep_var),
sep = "*"))
} else indep_vars <- union(indep_vars, topindep_var)
}
}
if (is.null(indep_vars))
indep_vars <- glb_mdl_feats_lst[[mdl_id_pfx]]
if (is.null(indep_vars) && grepl("RFE\\.", mdl_id_pfx))
indep_vars <- myextract_actual_feats(predictors(rfe_fit_results))
if (is.null(indep_vars))
indep_vars <- subset(glb_feats_df, !nzv & (exclude.as.feat != 1))[, "id"]
if (grepl("^%<d-%", indep_vars)) {
#stop(here")
indep_vars <-
eval(parse(text = str_trim(unlist(strsplit(indep_vars, "%<d-%"))[2])))
}
indep_vars <- myadjust_interaction_feats(indep_vars)
if (grepl("\\.Interact", mdl_id_pfx)) {
# if (method != tail(unlist(strsplit(bst_mdl_id, "[.]")), 1)) next
if (is.null(glb_mdl_family_lst[[mdl_id_pfx]])) {
if (!is.null(glb_mdl_family_lst[["Best.Interact"]]))
glb_mdl_family_lst[[mdl_id_pfx]] <-
glb_mdl_family_lst[["Best.Interact"]]
}
}
if (!is.null(glbObsFitOutliers[[mdl_id_pfx]])) {
fitobs_df <- glbObsFit[!(glbObsFit[, glb_id_var] %in%
glbObsFitOutliers[[mdl_id_pfx]]), ]
} else fitobs_df <- glbObsFit
if (is.null(glb_mdl_family_lst[[mdl_id_pfx]]))
mdl_methods <- glbMdlMethods else
mdl_methods <- glb_mdl_family_lst[[mdl_id_pfx]]
for (method in mdl_methods) {
if (method %in% c("rpart", "rf")) {
# rpart: fubar's the tree
# rf: skip the scenario w/ .rnorm for speed
indep_vars <- setdiff(indep_vars, c(".rnorm"))
#mdl_id <- paste0(mdl_id_pfx, ".no.rnorm")
}
fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df,
paste0("fit.models_1_", mdl_id_pfx), major.inc = FALSE,
label.minor = method)
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdl_id_pfx,
type = glb_model_type,
tune.df =
if ((mdl_id_pfx %in% "All.X") && (method %in% "glmnet")) glmnet_tune_models_df else
glb_tune_models_df,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
#trainControl.allowParallel = FALSE,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method)),
indep_vars = indep_vars, rsp_var = glb_rsp_var,
fit_df = fitobs_df, OOB_df = glbObsOOB)
}
}
## label step_major step_minor label_minor bgn end
## 1 fit.models_1_bgn 1 0 setup 258.668 258.678
## 2 fit.models_1_All.X 1 1 setup 258.679 NA
## elapsed
## 1 0.01
## 2 NA
## Warning in if (grepl("^%<d-%", indep_vars)) {: the condition has length > 1
## and only the first element will be used
## label step_major step_minor label_minor bgn end
## 2 fit.models_1_All.X 1 1 setup 258.679 258.688
## 3 fit.models_1_All.X 1 2 glmnet 258.688 NA
## elapsed
## 2 0.009
## 3 NA
## [1] "fitting model: All.X##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0934 on full training set
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = mdl_id_pfx, : model's bestTune found at an extreme of
## tuneGrid for parameter: alpha
## Warning in myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst
## = list(id.prefix = mdl_id_pfx, : model's bestTune found at an extreme of
## tuneGrid for parameter: lambda
## Length Class Mode
## a0 100 -none- numeric
## beta 26700 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 267 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.9128975142
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.0993818427
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 0.6780570283
## NDSSName.my.fctr#U.S.#Education
## -0.0538608422
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 0.6629269084
## NDSSName.my.fctrBusiness#Technology#
## 0.0167402411
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.0403659374
## NDSSName.my.fctrOpEd#Opinion#
## 0.7839563477
## NDSSName.my.fctrScience#Health#
## 0.8268022814
## NDSSName.my.fctrStyles##Fashion
## -0.0358394378
## NDSSName.my.fctrStyles#U.S.#
## 0.6505903792
## NDSSName.my.fctrTStyle##
## -0.0944336014
## PubDate.day.minutes.poly.1
## 6.1968659740
## PubDate.day.minutes.poly.2
## 2.7526995782
## PubDate.day.minutes.poly.4
## 1.0149860620
## PubDate.hour.fctr(15.3,23]
## 0.0158660084
## PubDate.last2.log1p
## 0.0049039456
## PubDate.last4.log1p
## 0.0020941831
## PubDate.wkend
## 0.1019986663
## WordCount.log1p
## 0.1377845090
## WordCount.root2
## 0.0237638430
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.1107203605
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -4.0365793853
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.3719600079
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 4.5046664811
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 0.5423083981
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 0.1741186123
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 1.8785299901
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -0.0058299785
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 0.0423465436
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -0.0029370848
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 0.0464038721
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -0.0027331891
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 0.0460673328
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 0.0311476654
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -0.0007315801
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 0.0237102812
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -0.0063533780
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -0.0085878059
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 0.0315807539
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -0.0047405213
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 0.0650963217
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 0.0083187966
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -0.0038312213
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 0.0671501716
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 0.0531800939
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -0.0015350453
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 0.0595488082
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -0.0091683893
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -0.0057844214
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 0.0428941930
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -0.0016995856
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 0.0400648704
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -0.0003591814
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 0.0398657040
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 0.0302335606
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 0.0209878355
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -0.0052426821
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -0.0079293622
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 0.0335177936
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -0.0045073437
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 0.0608649650
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 0.0046654736
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -0.0037544961
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 0.0592756628
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 0.0399694782
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -0.0013542140
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 0.0532508977
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -0.0079277443
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -0.0074876245
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 0.0398496916
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -0.0036507906
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 0.0466991152
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 0.0028638844
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -0.0036938119
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 0.0529339275
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 0.0380275686
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -0.0014099131
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 0.0358835221
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -0.0073495332
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -4.049609531
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.121989095
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 0.704262140
## NDSSName.my.fctr#U.S.#Education
## -0.063257524
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 0.677002877
## NDSSName.my.fctrBusiness#Technology#
## 0.028315897
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.050716705
## NDSSName.my.fctrOpEd#Opinion#
## 0.807935142
## NDSSName.my.fctrScience#Health#
## 0.862376733
## NDSSName.my.fctrStyles##Fashion
## -0.049433468
## NDSSName.my.fctrStyles#U.S.#
## 0.676296513
## NDSSName.my.fctrTStyle##
## -0.101555851
## PubDate.day.minutes.poly.1
## 6.611758257
## PubDate.day.minutes.poly.2
## 3.208695999
## PubDate.day.minutes.poly.4
## 1.326220683
## PubDate.hour.fctr(15.3,23]
## 0.020102410
## PubDate.last2.log1p
## 0.007238448
## PubDate.last4.log1p
## 0.004415541
## PubDate.wkday.fctr5
## -0.003012328
## PubDate.wkend
## 0.115562490
## WordCount.log1p
## 0.145706770
## WordCount.root2
## 0.025132374
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.376206087
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -5.504099804
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.588832251
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 5.291960917
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 0.815535466
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 0.465223110
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg
## 1.318496131
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 2.817439917
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -0.007521570
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 0.043782452
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -0.003638768
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 0.047023523
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -0.003570344
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 0.046510695
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 0.031064783
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -0.001725633
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 0.023208838
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -0.006858960
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -0.010783785
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 0.032428051
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -0.005690303
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 0.066411284
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 0.009998689
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -0.004894916
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 0.068521976
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 0.054480105
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -0.002798763
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 0.061667821
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -0.009898408
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -0.007409073
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 0.044412471
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -0.002351539
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 0.040393133
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -0.001082352
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 0.040033297
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 0.030162125
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg
## -0.000410508
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 0.020440278
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -0.005660356
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -0.009939087
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 0.034448920
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -0.005376182
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 0.062117169
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 0.005947848
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -0.004732169
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 0.060222943
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 0.040229359
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -0.002471088
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 0.054940484
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -0.008547698
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -0.009359864
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 0.041065851
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -0.004400494
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 0.047084797
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 0.003959960
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -0.004625690
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 0.053705399
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 0.038354862
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -0.002455757
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 0.036241983
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -0.007927033
## Prediction
## Reference N Y
## N 3796 145
## Y 182 681
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.319317e-01 7.651253e-01 9.244394e-01 9.388938e-01 8.203580e-01
## AccuracyPValue McnemarPValue
## 2.593356e-112 4.650238e-02
## Prediction
## Reference N Y
## N 1180 318
## Y 74 156
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.731481e-01 3.215874e-01 7.526580e-01 7.927064e-01 8.668981e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.259277e-34
## id
## 1 All.X##rcv#glmnet
## feats
## 1 WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 5 69.676 4.917
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.8620369 0.9662522 0.7578216 0.9591448
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.3 0.8063943 0.9270754
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9244394 0.9388938 0.7426728
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB max.AUCROCR.OOB
## 1 0.5930603 0.9165554 0.2695652 0.8075492
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.1 0.4431818 0.7731481
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.752658 0.7927064 0.3215874
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.005144973 0.01939759
# Check if other preProcess methods improve model performance
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, "fit.models_1_preProc", major.inc = FALSE,
label.minor = "preProc")
## label step_major step_minor label_minor bgn end
## 3 fit.models_1_All.X 1 2 glmnet 258.688 336.84
## 4 fit.models_1_preProc 1 3 preProc 336.841 NA
## elapsed
## 3 78.153
## 4 NA
mdl_id <- orderBy(get_model_sel_frmla(), glb_models_df)[1, "id"]
indep_vars_vctr <- trim(unlist(strsplit(glb_models_df[glb_models_df$id == mdl_id,
"feats"], "[,]")))
method <- tail(unlist(strsplit(mdl_id, "[.]")), 1)
mdl_id_pfx <- paste0(head(unlist(strsplit(mdl_id, "[.]")), -1), collapse = ".")
if (!is.null(glbObsFitOutliers[[mdl_id_pfx]])) {
fitobs_df <- glbObsFit[!(glbObsFit[, glb_id_var] %in%
glbObsFitOutliers[[mdl_id_pfx]]), ]
} else fitobs_df <- glbObsFit
for (prePr in glb_preproc_methods) {
# The operations are applied in this order:
# Box-Cox/Yeo-Johnson transformation, centering, scaling, range, imputation, PCA, ICA then spatial sign.
ret_lst <- myfit_mdl(mdl_specs_lst=myinit_mdl_specs_lst(mdl_specs_lst=list(
id.prefix=mdl_id_pfx,
type=glb_model_type, tune.df=glb_tune_models_df,
trainControl.method="repeatedcv",
trainControl.number=glb_rcv_n_folds,
trainControl.repeats=glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method=method, train.preProcess=prePr)),
indep_vars=indep_vars_vctr, rsp_var=glb_rsp_var,
fit_df=fitobs_df, OOB_df=glbObsOOB)
}
# If (All|RFE).X.glm is less accurate than Low.Cor.X.glm
# check NA coefficients & filter appropriate terms in indep_vars_vctr
# if (method == "glm") {
# orig_glm <- glb_models_lst[[paste0(mdl_id, ".", model_method)]]$finalModel
# orig_glm <- glb_models_lst[["All.X.glm"]]$finalModel; print(summary(orig_glm))
# orig_glm <- glb_models_lst[["RFE.X.glm"]]$finalModel; print(summary(orig_glm))
# require(car)
# vif_orig_glm <- vif(orig_glm); print(vif_orig_glm)
# # if vif errors out with "there are aliased coefficients in the model"
# alias_orig_glm <- alias(orig_glm); alias_complete_orig_glm <- (alias_orig_glm$Complete > 0); alias_complete_orig_glm <- alias_complete_orig_glm[rowSums(alias_complete_orig_glm) > 0, colSums(alias_complete_orig_glm) > 0]; print(alias_complete_orig_glm)
# print(vif_orig_glm[!is.na(vif_orig_glm) & (vif_orig_glm == Inf)])
# print(which.max(vif_orig_glm))
# print(sort(vif_orig_glm[vif_orig_glm >= 1.0e+03], decreasing=TRUE))
# glbObsFit[c(1143, 3637, 3953, 4105), c("UniqueID", "Popular", "H.P.quandary", "Headline")]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.chrs.n.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.chrs.n.log", glb_feats_df$id, value=TRUE), ]
# all.equal(glbObsAll$S.chrs.uppr.n.log, glbObsAll$A.chrs.uppr.n.log)
# cor(glbObsAll$S.T.herald, glbObsAll$S.T.tribun)
# mydspObs(Abstract.contains="[Dd]iar", cols=("Abstract"), all=TRUE)
# subset(glb_feats_df, cor.y.abs <= glb_feats_df[glb_feats_df$id == ".rnorm", "cor.y.abs"])
# corxx_mtrx <- cor(data.matrix(glbObsAll[, setdiff(names(glbObsAll), myfind_chr_cols_df(glbObsAll))]), use="pairwise.complete.obs"); abs_corxx_mtrx <- abs(corxx_mtrx); diag(abs_corxx_mtrx) <- 0
# which.max(abs_corxx_mtrx["S.T.tribun", ])
# abs_corxx_mtrx["A.npnct08.log", "S.npnct08.log"]
# step_glm <- step(orig_glm)
# }
# Since caret does not optimize rpart well
# if (method == "rpart")
# ret_lst <- myfit_mdl(mdl_id=paste0(mdl_id_pfx, ".cp.0"), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=0, tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
# User specified
# Ensure at least 2 vars in each regression; else varImp crashes
# sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df; sav_featsimp_df <- glb_featsimp_df; all.equal(sav_featsimp_df, glb_featsimp_df)
# glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df; glm_featsimp_df <- sav_featsimp_df
# easier to exclude features
# require(gdata) # needed for trim
# mdl_id <- "";
# indep_vars_vctr <- head(subset(glb_models_df, grepl("All\\.X\\.", mdl_id), select=feats)
# , 1)[, "feats"]
# indep_vars_vctr <- trim(unlist(strsplit(indep_vars_vctr, "[,]")))
# indep_vars_vctr <- setdiff(indep_vars_vctr, ".rnorm")
# easier to include features
#stop(here"); sav_models_df <- glb_models_df; glb_models_df <- sav_models_df
# !_sp
# mdl_id <- "csm"; indep_vars_vctr <- c(NULL
# ,"prdline.my.fctr", "prdline.my.fctr:.clusterid.fctr"
# ,"prdline.my.fctr*biddable"
# #,"prdline.my.fctr*startprice.log"
# #,"prdline.my.fctr*startprice.diff"
# ,"prdline.my.fctr*condition.fctr"
# ,"prdline.my.fctr*D.terms.post.stop.n"
# #,"prdline.my.fctr*D.terms.post.stem.n"
# ,"prdline.my.fctr*cellular.fctr"
# # ,"<feat1>:<feat2>"
# )
# for (method in glbMdlMethods) {
# ret_lst <- myfit_mdl(mdl_id=mdl_id, model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=glb_rcv_n_folds, tune_models_df=glb_tune_models_df)
# csm_mdl_id <- paste0(mdl_id, ".", method)
# csm_featsimp_df <- myget_feats_importance(glb_models_lst[[paste0(mdl_id, ".",
# method)]]); print(head(csm_featsimp_df))
# }
###
# Ntv.1.lm <- lm(reformulate(indep_vars_vctr, glb_rsp_var), glbObsTrn); print(summary(Ntv.1.lm))
#glb_models_df[, "max.Accuracy.OOB", FALSE]
#varImp(glb_models_lst[["Low.cor.X.glm"]])
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.2.glm"]])$imp)
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.3.glm"]])$imp)
#glb_feats_df[grepl("npnct28", glb_feats_df$id), ]
# User specified bivariate models
# indep_vars_vctr_lst <- list()
# for (feat in setdiff(names(glbObsFit),
# union(glb_rsp_var, glbFeatsExclude)))
# indep_vars_vctr_lst[["feat"]] <- feat
# User specified combinatorial models
# indep_vars_vctr_lst <- list()
# combn_mtrx <- combn(c("<feat1_name>", "<feat2_name>", "<featn_name>"),
# <num_feats_to_choose>)
# for (combn_ix in 1:ncol(combn_mtrx))
# #print(combn_mtrx[, combn_ix])
# indep_vars_vctr_lst[[combn_ix]] <- combn_mtrx[, combn_ix]
# template for myfit_mdl
# rf is hard-coded in caret to recognize only Accuracy / Kappa evaluation metrics
# only for OOB in trainControl ?
# ret_lst <- myfit_mdl_fn(mdl_id=paste0(mdl_id_pfx, ""), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# rsp_var=glb_rsp_var,
# fit_df=glbObsFit, OOB_df=glbObsOOB,
# n_cv_folds=glb_rcv_n_folds, tune_models_df=glb_tune_models_df,
# model_loss_mtrx=glbMdlMetric_terms,
# model_summaryFunction=glbMdlMetricSummaryFn,
# model_metric=glbMdlMetricSummary,
# model_metric_maximize=glbMdlMetricMaximize)
# Simplify a model
# fit_df <- glbObsFit; glb_mdl <- step(<complex>_mdl)
# Non-caret models
# rpart_area_mdl <- rpart(reformulate("Area", response=glb_rsp_var),
# data=glbObsFit, #method="class",
# control=rpart.control(cp=0.12),
# parms=list(loss=glbMdlMetric_terms))
# print("rpart_sel_wlm_mdl"); prp(rpart_sel_wlm_mdl)
#
print(glb_models_df)
## id
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y.rcv.3X1##rcv#glmnet Max.cor.Y.rcv.3X1##rcv#glmnet
## Max.cor.Y.rcv.3X3##rcv#glmnet Max.cor.Y.rcv.3X3##rcv#glmnet
## Max.cor.Y.rcv.3X5##rcv#glmnet Max.cor.Y.rcv.3X5##rcv#glmnet
## Max.cor.Y.rcv.5X1##rcv#glmnet Max.cor.Y.rcv.5X1##rcv#glmnet
## Max.cor.Y.rcv.5X3##rcv#glmnet Max.cor.Y.rcv.5X3##rcv#glmnet
## Max.cor.Y.rcv.5X5##rcv#glmnet Max.cor.Y.rcv.5X5##rcv#glmnet
## Max.cor.Y.rcv.1X1.cp.0###rpart Max.cor.Y.rcv.1X1.cp.0###rpart
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Max.cor.Y.TmSrs.poly##rcv#glmnet Max.cor.Y.TmSrs.poly##rcv#glmnet
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## feats
## MFO###myMFO_classfr .rnorm
## Random###myrandom_classfr .rnorm
## Max.cor.Y.rcv.1X1###glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.1X1.cp.0###rpart WordCount.root2,NDSSName.my.fctr
## Max.cor.Y##rcv#rpart WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.TmSrs.poly##rcv#glmnet WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.day.minutes.poly.2,PubDate.day.minutes.poly.3,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.5,PubDate.day.minutes.poly.1.ctg,PubDate.day.minutes.poly.2.ctg,PubDate.day.minutes.poly.3.ctg,PubDate.day.minutes.poly.4.ctg,PubDate.day.minutes.poly.5.ctg
## Interact.High.cor.Y##rcv#glmnet WordCount.root2,NDSSName.my.fctr,WordCount.root2:WordCount.root2,WordCount.root2:PubDate.day.minutes.poly.1,WordCount.root2:PubDate.last4.log1p,WordCount.root2:PubDate.month.fctr
## Low.cor.X##rcv#glmnet WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## All.X##rcv#glmnet WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## max.nTuningRuns
## MFO###myMFO_classfr 0
## Random###myrandom_classfr 0
## Max.cor.Y.rcv.1X1###glmnet 0
## Max.cor.Y.rcv.3X1##rcv#glmnet 25
## Max.cor.Y.rcv.3X3##rcv#glmnet 25
## Max.cor.Y.rcv.3X5##rcv#glmnet 25
## Max.cor.Y.rcv.5X1##rcv#glmnet 25
## Max.cor.Y.rcv.5X3##rcv#glmnet 25
## Max.cor.Y.rcv.5X5##rcv#glmnet 25
## Max.cor.Y.rcv.1X1.cp.0###rpart 0
## Max.cor.Y##rcv#rpart 5
## Max.cor.Y.TmSrs.poly##rcv#glmnet 25
## Interact.High.cor.Y##rcv#glmnet 25
## Low.cor.X##rcv#glmnet 5
## All.X##rcv#glmnet 5
## min.elapsedtime.everything
## MFO###myMFO_classfr 0.292
## Random###myrandom_classfr 0.302
## Max.cor.Y.rcv.1X1###glmnet 1.010
## Max.cor.Y.rcv.3X1##rcv#glmnet 2.476
## Max.cor.Y.rcv.3X3##rcv#glmnet 4.546
## Max.cor.Y.rcv.3X5##rcv#glmnet 6.409
## Max.cor.Y.rcv.5X1##rcv#glmnet 3.311
## Max.cor.Y.rcv.5X3##rcv#glmnet 6.029
## Max.cor.Y.rcv.5X5##rcv#glmnet 9.050
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.870
## Max.cor.Y##rcv#rpart 2.935
## Max.cor.Y.TmSrs.poly##rcv#glmnet 5.181
## Interact.High.cor.Y##rcv#glmnet 5.038
## Low.cor.X##rcv#glmnet 72.242
## All.X##rcv#glmnet 69.676
## min.elapsedtime.final max.AUCpROC.fit
## MFO###myMFO_classfr 0.003 0.5000000
## Random###myrandom_classfr 0.002 0.4990604
## Max.cor.Y.rcv.1X1###glmnet 0.273 0.8790544
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.271 0.8767919
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.270 0.8767919
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.272 0.8767919
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.267 0.8784031
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.268 0.8784031
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.270 0.8784031
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.070 0.8821543
## Max.cor.Y##rcv#rpart 0.073 0.8709432
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.353 0.8748550
## Interact.High.cor.Y##rcv#glmnet 0.322 0.8776419
## Low.cor.X##rcv#glmnet 4.675 0.8624894
## All.X##rcv#glmnet 4.917 0.8620369
## max.Sens.fit max.Spec.fit max.AUCROCR.fit
## MFO###myMFO_classfr 1.0000000 0.0000000 0.5000000
## Random###myrandom_classfr 0.8312611 0.1668598 0.4972757
## Max.cor.Y.rcv.1X1###glmnet 0.9632073 0.7949015 0.9608594
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.9705658 0.7937428 0.9504198
## Max.cor.Y##rcv#rpart 0.9632073 0.7786790 0.8746354
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.9652372 0.7844728 0.9565422
## Interact.High.cor.Y##rcv#glmnet 0.9626998 0.7925840 0.9625372
## Low.cor.X##rcv#glmnet 0.9659985 0.7589803 0.9588640
## All.X##rcv#glmnet 0.9662522 0.7578216 0.9591448
## opt.prob.threshold.fit max.f.score.fit
## MFO###myMFO_classfr 0.1 0.3045703
## Random###myrandom_classfr 0.1 0.3045703
## Max.cor.Y.rcv.1X1###glmnet 0.5 0.8099174
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4 0.8235294
## Max.cor.Y##rcv#rpart 0.6 0.8000000
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4 0.8099174
## Interact.High.cor.Y##rcv#glmnet 0.4 0.8084359
## Low.cor.X##rcv#glmnet 0.2 0.8077374
## All.X##rcv#glmnet 0.3 0.8063943
## max.Accuracy.fit max.AccuracyLower.fit
## MFO###myMFO_classfr 0.1796420 0.1688795
## Random###myrandom_classfr 0.1796420 0.1688795
## Max.cor.Y.rcv.1X1###glmnet 0.9329725 0.9255302
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.9335973 0.9255302
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.9333193 0.9255302
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.9332218 0.9255302
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.9331818 0.9259666
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.9333905 0.9259666
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.9331816 0.9259666
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.9381765 0.9309917
## Max.cor.Y##rcv#rpart 0.9296422 0.9224771
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.9323484 0.9255302
## Interact.High.cor.Y##rcv#glmnet 0.9315850 0.9244394
## Low.cor.X##rcv#glmnet 0.9276303 0.9242213
## All.X##rcv#glmnet 0.9270754 0.9244394
## max.AccuracyUpper.fit max.Kappa.fit
## MFO###myMFO_classfr 0.1907952 0.0000000
## Random###myrandom_classfr 0.1907952 0.0000000
## Max.cor.Y.rcv.1X1###glmnet 0.9398832 0.7692476
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.9398832 0.7691678
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.9398832 0.7690803
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.9398832 0.7686375
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.9402789 0.7689055
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.9402789 0.7698577
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.9402789 0.7691429
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.9448229 0.7860827
## Max.cor.Y##rcv#rpart 0.9371115 0.7515134
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.9398832 0.7646741
## Interact.High.cor.Y##rcv#glmnet 0.9388938 0.7641040
## Low.cor.X##rcv#glmnet 0.9386958 0.7453708
## All.X##rcv#glmnet 0.9388938 0.7426728
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB
## MFO###myMFO_classfr 0.5000000 1.0000000 0.0000000
## Random###myrandom_classfr 0.5125675 0.8077437 0.2173913
## Max.cor.Y.rcv.1X1###glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.6174697 0.9218959 0.3130435
## Max.cor.Y##rcv#rpart 0.5870523 0.9045394 0.2695652
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.5965780 0.9105474 0.2826087
## Interact.High.cor.Y##rcv#glmnet 0.6009259 0.9105474 0.2913043
## Low.cor.X##rcv#glmnet 0.5917252 0.9138852 0.2695652
## All.X##rcv#glmnet 0.5930603 0.9165554 0.2695652
## max.AUCROCR.OOB opt.prob.threshold.OOB
## MFO###myMFO_classfr 0.5000000 0.1
## Random###myrandom_classfr 0.4857956 0.1
## Max.cor.Y.rcv.1X1###glmnet 0.8116126 0.1
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7773858 0.1
## Max.cor.Y##rcv#rpart 0.5892132 0.6
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.8049472 0.1
## Interact.High.cor.Y##rcv#glmnet 0.8140971 0.1
## Low.cor.X##rcv#glmnet 0.8052766 0.1
## All.X##rcv#glmnet 0.8075492 0.1
## max.f.score.OOB max.Accuracy.OOB
## MFO###myMFO_classfr 0.2349336 0.1331019
## Random###myrandom_classfr 0.2349336 0.1331019
## Max.cor.Y.rcv.1X1###glmnet 0.4405405 0.7604167
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4207493 0.7673611
## Max.cor.Y##rcv#rpart 0.2850575 0.8200231
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4441261 0.7754630
## Interact.High.cor.Y##rcv#glmnet 0.4398340 0.7656250
## Low.cor.X##rcv#glmnet 0.4152672 0.7783565
## All.X##rcv#glmnet 0.4431818 0.7731481
## max.AccuracyLower.OOB
## MFO###myMFO_classfr 0.1174298
## Random###myrandom_classfr 0.1174298
## Max.cor.Y.rcv.1X1###glmnet 0.7395703
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.7365992
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.7365992
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.7365992
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.7395703
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.7395703
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.7395703
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7467059
## Max.cor.Y##rcv#rpart 0.8010821
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.7550404
## Interact.High.cor.Y##rcv#glmnet 0.7449213
## Low.cor.X##rcv#glmnet 0.7580195
## All.X##rcv#glmnet 0.7526580
## max.AccuracyUpper.OOB max.Kappa.OOB
## MFO###myMFO_classfr 0.1500310 0.0000000
## Random###myrandom_classfr 0.1500310 0.0000000
## Max.cor.Y.rcv.1X1###glmnet 0.7803749 0.3148374
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.7775689 0.3107477
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.7775689 0.3107477
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.7775689 0.3107477
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.7803749 0.3373693
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.7803749 0.3373693
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.7803749 0.3373693
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7871043 0.2953321
## Max.cor.Y##rcv#rpart 0.8378705 0.1825002
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.7949457 0.3233542
## Interact.High.cor.Y##rcv#glmnet 0.7854227 0.3156027
## Low.cor.X##rcv#glmnet 0.7977437 0.2931798
## All.X##rcv#glmnet 0.7927064 0.3215874
## max.AccuracySD.fit max.KappaSD.fit
## MFO###myMFO_classfr NA NA
## Random###myrandom_classfr NA NA
## Max.cor.Y.rcv.1X1###glmnet NA NA
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.007015493 0.02403706
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.005178375 0.01754365
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.005396525 0.01835474
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.008837283 0.03133449
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.006138477 0.02161286
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.006213800 0.02210061
## Max.cor.Y.rcv.1X1.cp.0###rpart NA NA
## Max.cor.Y##rcv#rpart 0.005069520 0.01910910
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.005312169 0.01855071
## Interact.High.cor.Y##rcv#glmnet 0.005250654 0.01810996
## Low.cor.X##rcv#glmnet 0.004942454 0.01832711
## All.X##rcv#glmnet 0.005144973 0.01939759
rm(ret_lst)
fit.models_1_chunk_df <-
myadd_chunk(fit.models_1_chunk_df, "fit.models_1_end", major.inc = FALSE,
label.minor = "teardown")
## label step_major step_minor label_minor bgn end
## 4 fit.models_1_preProc 1 3 preProc 336.841 336.91
## 5 fit.models_1_end 1 4 teardown 336.911 NA
## elapsed
## 4 0.069
## 5 NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc = FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 11 fit.models 6 1 1 246.208 336.92 90.713
## 12 fit.models 6 2 2 336.921 NA NA
fit.models_2_chunk_df <-
myadd_chunk(NULL, "fit.models_2_bgn", label.minor = "setup")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_2_bgn 1 0 setup 338.597 NA NA
plt_models_df <- glb_models_df[, -grep("SD|Upper|Lower", names(glb_models_df))]
for (var in grep("^min.", names(plt_models_df), value=TRUE)) {
plt_models_df[, sub("min.", "inv.", var)] <-
#ifelse(all(is.na(tmp <- plt_models_df[, var])), NA, 1.0 / tmp)
1.0 / plt_models_df[, var]
plt_models_df <- plt_models_df[ , -grep(var, names(plt_models_df))]
}
print(plt_models_df)
## id
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y.rcv.3X1##rcv#glmnet Max.cor.Y.rcv.3X1##rcv#glmnet
## Max.cor.Y.rcv.3X3##rcv#glmnet Max.cor.Y.rcv.3X3##rcv#glmnet
## Max.cor.Y.rcv.3X5##rcv#glmnet Max.cor.Y.rcv.3X5##rcv#glmnet
## Max.cor.Y.rcv.5X1##rcv#glmnet Max.cor.Y.rcv.5X1##rcv#glmnet
## Max.cor.Y.rcv.5X3##rcv#glmnet Max.cor.Y.rcv.5X3##rcv#glmnet
## Max.cor.Y.rcv.5X5##rcv#glmnet Max.cor.Y.rcv.5X5##rcv#glmnet
## Max.cor.Y.rcv.1X1.cp.0###rpart Max.cor.Y.rcv.1X1.cp.0###rpart
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Max.cor.Y.TmSrs.poly##rcv#glmnet Max.cor.Y.TmSrs.poly##rcv#glmnet
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## feats
## MFO###myMFO_classfr .rnorm
## Random###myrandom_classfr .rnorm
## Max.cor.Y.rcv.1X1###glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.3X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X1##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X3##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.5X5##rcv#glmnet WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.rcv.1X1.cp.0###rpart WordCount.root2,NDSSName.my.fctr
## Max.cor.Y##rcv#rpart WordCount.root2,NDSSName.my.fctr
## Max.cor.Y.TmSrs.poly##rcv#glmnet WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.day.minutes.poly.2,PubDate.day.minutes.poly.3,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.5,PubDate.day.minutes.poly.1.ctg,PubDate.day.minutes.poly.2.ctg,PubDate.day.minutes.poly.3.ctg,PubDate.day.minutes.poly.4.ctg,PubDate.day.minutes.poly.5.ctg
## Interact.High.cor.Y##rcv#glmnet WordCount.root2,NDSSName.my.fctr,WordCount.root2:WordCount.root2,WordCount.root2:PubDate.day.minutes.poly.1,WordCount.root2:PubDate.last4.log1p,WordCount.root2:PubDate.month.fctr
## Low.cor.X##rcv#glmnet WordCount.root2,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## All.X##rcv#glmnet WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## max.nTuningRuns max.AUCpROC.fit
## MFO###myMFO_classfr 0 0.5000000
## Random###myrandom_classfr 0 0.4990604
## Max.cor.Y.rcv.1X1###glmnet 0 0.8790544
## Max.cor.Y.rcv.3X1##rcv#glmnet 25 0.8767919
## Max.cor.Y.rcv.3X3##rcv#glmnet 25 0.8767919
## Max.cor.Y.rcv.3X5##rcv#glmnet 25 0.8767919
## Max.cor.Y.rcv.5X1##rcv#glmnet 25 0.8784031
## Max.cor.Y.rcv.5X3##rcv#glmnet 25 0.8784031
## Max.cor.Y.rcv.5X5##rcv#glmnet 25 0.8784031
## Max.cor.Y.rcv.1X1.cp.0###rpart 0 0.8821543
## Max.cor.Y##rcv#rpart 5 0.8709432
## Max.cor.Y.TmSrs.poly##rcv#glmnet 25 0.8748550
## Interact.High.cor.Y##rcv#glmnet 25 0.8776419
## Low.cor.X##rcv#glmnet 5 0.8624894
## All.X##rcv#glmnet 5 0.8620369
## max.Sens.fit max.Spec.fit max.AUCROCR.fit
## MFO###myMFO_classfr 1.0000000 0.0000000 0.5000000
## Random###myrandom_classfr 0.8312611 0.1668598 0.4972757
## Max.cor.Y.rcv.1X1###glmnet 0.9632073 0.7949015 0.9608594
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.9644760 0.7891078 0.9582555
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.9642223 0.7925840 0.9607052
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.9705658 0.7937428 0.9504198
## Max.cor.Y##rcv#rpart 0.9632073 0.7786790 0.8746354
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.9652372 0.7844728 0.9565422
## Interact.High.cor.Y##rcv#glmnet 0.9626998 0.7925840 0.9625372
## Low.cor.X##rcv#glmnet 0.9659985 0.7589803 0.9588640
## All.X##rcv#glmnet 0.9662522 0.7578216 0.9591448
## opt.prob.threshold.fit max.f.score.fit
## MFO###myMFO_classfr 0.1 0.3045703
## Random###myrandom_classfr 0.1 0.3045703
## Max.cor.Y.rcv.1X1###glmnet 0.5 0.8099174
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4 0.8099174
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5 0.8104265
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4 0.8235294
## Max.cor.Y##rcv#rpart 0.6 0.8000000
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4 0.8099174
## Interact.High.cor.Y##rcv#glmnet 0.4 0.8084359
## Low.cor.X##rcv#glmnet 0.2 0.8077374
## All.X##rcv#glmnet 0.3 0.8063943
## max.Accuracy.fit max.Kappa.fit
## MFO###myMFO_classfr 0.1796420 0.0000000
## Random###myrandom_classfr 0.1796420 0.0000000
## Max.cor.Y.rcv.1X1###glmnet 0.9329725 0.7692476
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.9335973 0.7691678
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.9333193 0.7690803
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.9332218 0.7686375
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.9331818 0.7689055
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.9333905 0.7698577
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.9331816 0.7691429
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.9381765 0.7860827
## Max.cor.Y##rcv#rpart 0.9296422 0.7515134
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.9323484 0.7646741
## Interact.High.cor.Y##rcv#glmnet 0.9315850 0.7641040
## Low.cor.X##rcv#glmnet 0.9276303 0.7453708
## All.X##rcv#glmnet 0.9270754 0.7426728
## max.AUCpROC.OOB max.Sens.OOB max.Spec.OOB
## MFO###myMFO_classfr 0.5000000 1.0000000 0.0000000
## Random###myrandom_classfr 0.5125675 0.8077437 0.2173913
## Max.cor.Y.rcv.1X1###glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5962443 0.9098798 0.2826087
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.6174697 0.9218959 0.3130435
## Max.cor.Y##rcv#rpart 0.5870523 0.9045394 0.2695652
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.5965780 0.9105474 0.2826087
## Interact.High.cor.Y##rcv#glmnet 0.6009259 0.9105474 0.2913043
## Low.cor.X##rcv#glmnet 0.5917252 0.9138852 0.2695652
## All.X##rcv#glmnet 0.5930603 0.9165554 0.2695652
## max.AUCROCR.OOB opt.prob.threshold.OOB
## MFO###myMFO_classfr 0.5000000 0.1
## Random###myrandom_classfr 0.4857956 0.1
## Max.cor.Y.rcv.1X1###glmnet 0.8116126 0.1
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.8067975 0.1
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.8114863 0.1
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7773858 0.1
## Max.cor.Y##rcv#rpart 0.5892132 0.6
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.8049472 0.1
## Interact.High.cor.Y##rcv#glmnet 0.8140971 0.1
## Low.cor.X##rcv#glmnet 0.8052766 0.1
## All.X##rcv#glmnet 0.8075492 0.1
## max.f.score.OOB max.Accuracy.OOB
## MFO###myMFO_classfr 0.2349336 0.1331019
## Random###myrandom_classfr 0.2349336 0.1331019
## Max.cor.Y.rcv.1X1###glmnet 0.4405405 0.7604167
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4375839 0.7575231
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.4609375 0.7604167
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4207493 0.7673611
## Max.cor.Y##rcv#rpart 0.2850575 0.8200231
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4441261 0.7754630
## Interact.High.cor.Y##rcv#glmnet 0.4398340 0.7656250
## Low.cor.X##rcv#glmnet 0.4152672 0.7783565
## All.X##rcv#glmnet 0.4431818 0.7731481
## max.Kappa.OOB inv.elapsedtime.everything
## MFO###myMFO_classfr 0.0000000 3.42465753
## Random###myrandom_classfr 0.0000000 3.31125828
## Max.cor.Y.rcv.1X1###glmnet 0.3148374 0.99009901
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.3107477 0.40387722
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.3107477 0.21997360
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.3107477 0.15603058
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.3373693 0.30202356
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.3373693 0.16586499
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.3373693 0.11049724
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.2953321 1.14942529
## Max.cor.Y##rcv#rpart 0.1825002 0.34071550
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.3233542 0.19301293
## Interact.High.cor.Y##rcv#glmnet 0.3156027 0.19849146
## Low.cor.X##rcv#glmnet 0.2931798 0.01384236
## All.X##rcv#glmnet 0.3215874 0.01435214
## inv.elapsedtime.final
## MFO###myMFO_classfr 333.3333333
## Random###myrandom_classfr 500.0000000
## Max.cor.Y.rcv.1X1###glmnet 3.6630037
## Max.cor.Y.rcv.3X1##rcv#glmnet 3.6900369
## Max.cor.Y.rcv.3X3##rcv#glmnet 3.7037037
## Max.cor.Y.rcv.3X5##rcv#glmnet 3.6764706
## Max.cor.Y.rcv.5X1##rcv#glmnet 3.7453184
## Max.cor.Y.rcv.5X3##rcv#glmnet 3.7313433
## Max.cor.Y.rcv.5X5##rcv#glmnet 3.7037037
## Max.cor.Y.rcv.1X1.cp.0###rpart 14.2857143
## Max.cor.Y##rcv#rpart 13.6986301
## Max.cor.Y.TmSrs.poly##rcv#glmnet 2.8328612
## Interact.High.cor.Y##rcv#glmnet 3.1055901
## Low.cor.X##rcv#glmnet 0.2139037
## All.X##rcv#glmnet 0.2033760
print(myplot_radar(radar_inp_df=plt_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 15. Consider specifying shapes manually if you must have them.
## Warning: Removed 180 rows containing missing values (geom_point).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 15. Consider specifying shapes manually if you must have them.
# print(myplot_radar(radar_inp_df=subset(plt_models_df,
# !(mdl_id %in% grep("random|MFO", plt_models_df$id, value=TRUE)))))
# Compute CI for <metric>SD
glb_models_df <- mutate(glb_models_df,
max.df = ifelse(max.nTuningRuns > 1, max.nTuningRuns - 1, NA),
min.sd2ci.scaler = ifelse(is.na(max.df), NA, qt(0.975, max.df)))
for (var in grep("SD", names(glb_models_df), value=TRUE)) {
# Does CI alredy exist ?
var_components <- unlist(strsplit(var, "SD"))
varActul <- paste0(var_components[1], var_components[2])
varUpper <- paste0(var_components[1], "Upper", var_components[2])
varLower <- paste0(var_components[1], "Lower", var_components[2])
if (varUpper %in% names(glb_models_df)) {
warning(varUpper, " already exists in glb_models_df")
# Assuming Lower also exists
next
}
print(sprintf("var:%s", var))
# CI is dependent on sample size in t distribution; df=n-1
glb_models_df[, varUpper] <- glb_models_df[, varActul] +
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
glb_models_df[, varLower] <- glb_models_df[, varActul] -
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
}
## Warning: max.AccuracyUpper.fit already exists in glb_models_df
## [1] "var:max.KappaSD.fit"
# Plot metrics with CI
plt_models_df <- glb_models_df[, "id", FALSE]
pltCI_models_df <- glb_models_df[, "id", FALSE]
for (var in grep("Upper", names(glb_models_df), value=TRUE)) {
var_components <- unlist(strsplit(var, "Upper"))
col_name <- unlist(paste(var_components, collapse=""))
plt_models_df[, col_name] <- glb_models_df[, col_name]
for (name in paste0(var_components[1], c("Upper", "Lower"), var_components[2]))
pltCI_models_df[, name] <- glb_models_df[, name]
}
build_statsCI_data <- function(plt_models_df) {
mltd_models_df <- melt(plt_models_df, id.vars="id")
mltd_models_df$data <- sapply(1:nrow(mltd_models_df),
function(row_ix) tail(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]), "[.]")), 1))
mltd_models_df$label <- sapply(1:nrow(mltd_models_df),
function(row_ix) head(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]),
paste0(".", mltd_models_df[row_ix, "data"]))), 1))
#print(mltd_models_df)
return(mltd_models_df)
}
mltd_models_df <- build_statsCI_data(plt_models_df)
mltdCI_models_df <- melt(pltCI_models_df, id.vars="id")
for (row_ix in 1:nrow(mltdCI_models_df)) {
for (type in c("Upper", "Lower")) {
if (length(var_components <- unlist(strsplit(
as.character(mltdCI_models_df[row_ix, "variable"]), type))) > 1) {
#print(sprintf("row_ix:%d; type:%s; ", row_ix, type))
mltdCI_models_df[row_ix, "label"] <- var_components[1]
mltdCI_models_df[row_ix, "data"] <-
unlist(strsplit(var_components[2], "[.]"))[2]
mltdCI_models_df[row_ix, "type"] <- type
break
}
}
}
wideCI_models_df <- reshape(subset(mltdCI_models_df, select=-variable),
timevar="type",
idvar=setdiff(names(mltdCI_models_df), c("type", "value", "variable")),
direction="wide")
#print(wideCI_models_df)
mrgdCI_models_df <- merge(wideCI_models_df, mltd_models_df, all.x=TRUE)
#print(mrgdCI_models_df)
# Merge stats back in if CIs don't exist
goback_vars <- c()
for (var in unique(mltd_models_df$label)) {
for (type in unique(mltd_models_df$data)) {
var_type <- paste0(var, ".", type)
# if this data is already present, next
if (var_type %in% unique(paste(mltd_models_df$label, mltd_models_df$data,
sep=".")))
next
#print(sprintf("var_type:%s", var_type))
goback_vars <- c(goback_vars, var_type)
}
}
if (length(goback_vars) > 0) {
mltd_goback_df <- build_statsCI_data(glb_models_df[, c("id", goback_vars)])
mltd_models_df <- rbind(mltd_models_df, mltd_goback_df)
}
# mltd_models_df <- merge(mltd_models_df, glb_models_df[, c("id", "model_method")],
# all.x=TRUE)
png(paste0(glb_out_pfx, "models_bar.png"), width=480*3, height=480*2)
#print(gp <- myplot_bar(mltd_models_df, "id", "value", colorcol_name="model_method") +
print(gp <- myplot_bar(df=mltd_models_df, xcol_name="id", ycol_names="value") +
geom_errorbar(data=mrgdCI_models_df,
mapping=aes(x=mdl_id, ymax=value.Upper, ymin=value.Lower), width=0.5) +
facet_grid(label ~ data, scales="free") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5)))
## Warning: Removed 4 rows containing missing values (geom_errorbar).
dev.off()
## quartz_off_screen
## 2
print(gp)
## Warning: Removed 4 rows containing missing values (geom_errorbar).
dsp_models_cols <- c("id",
glbMdlMetricsEval[glbMdlMetricsEval %in% names(glb_models_df)],
grep("opt.", names(glb_models_df), fixed = TRUE, value = TRUE))
# if (glb_is_classification && glb_is_binomial)
# dsp_models_cols <- c(dsp_models_cols, "opt.prob.threshold.OOB")
print(dsp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)[, dsp_models_cols])
## id
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## Max.cor.Y.TmSrs.poly##rcv#glmnet Max.cor.Y.TmSrs.poly##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## Max.cor.Y.rcv.1X1.cp.0###rpart Max.cor.Y.rcv.1X1.cp.0###rpart
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y.rcv.5X3##rcv#glmnet Max.cor.Y.rcv.5X3##rcv#glmnet
## Max.cor.Y.rcv.5X1##rcv#glmnet Max.cor.Y.rcv.5X1##rcv#glmnet
## Max.cor.Y.rcv.5X5##rcv#glmnet Max.cor.Y.rcv.5X5##rcv#glmnet
## Max.cor.Y.rcv.3X1##rcv#glmnet Max.cor.Y.rcv.3X1##rcv#glmnet
## Max.cor.Y.rcv.3X3##rcv#glmnet Max.cor.Y.rcv.3X3##rcv#glmnet
## Max.cor.Y.rcv.3X5##rcv#glmnet Max.cor.Y.rcv.3X5##rcv#glmnet
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## max.Accuracy.OOB max.AUCROCR.OOB
## Max.cor.Y##rcv#rpart 0.8200231 0.5892132
## Low.cor.X##rcv#glmnet 0.7783565 0.8052766
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.7754630 0.8049472
## All.X##rcv#glmnet 0.7731481 0.8075492
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7673611 0.7773858
## Interact.High.cor.Y##rcv#glmnet 0.7656250 0.8140971
## Max.cor.Y.rcv.1X1###glmnet 0.7604167 0.8116126
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.7575231 0.8067975
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.7575231 0.8067975
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.7575231 0.8067975
## MFO###myMFO_classfr 0.1331019 0.5000000
## Random###myrandom_classfr 0.1331019 0.4857956
## max.AUCpROC.OOB max.Accuracy.fit
## Max.cor.Y##rcv#rpart 0.5870523 0.9296422
## Low.cor.X##rcv#glmnet 0.5917252 0.9276303
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.5965780 0.9323484
## All.X##rcv#glmnet 0.5930603 0.9270754
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.6174697 0.9381765
## Interact.High.cor.Y##rcv#glmnet 0.6009259 0.9315850
## Max.cor.Y.rcv.1X1###glmnet 0.5962443 0.9329725
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5962443 0.9333905
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5962443 0.9331818
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5962443 0.9331816
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.5962443 0.9335973
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.5962443 0.9333193
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.5962443 0.9332218
## MFO###myMFO_classfr 0.5000000 0.1796420
## Random###myrandom_classfr 0.5125675 0.1796420
## opt.prob.threshold.fit
## Max.cor.Y##rcv#rpart 0.6
## Low.cor.X##rcv#glmnet 0.2
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4
## All.X##rcv#glmnet 0.3
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4
## Interact.High.cor.Y##rcv#glmnet 0.4
## Max.cor.Y.rcv.1X1###glmnet 0.5
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4
## MFO###myMFO_classfr 0.1
## Random###myrandom_classfr 0.1
## opt.prob.threshold.OOB
## Max.cor.Y##rcv#rpart 0.6
## Low.cor.X##rcv#glmnet 0.1
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.1
## All.X##rcv#glmnet 0.1
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.1
## Interact.High.cor.Y##rcv#glmnet 0.1
## Max.cor.Y.rcv.1X1###glmnet 0.1
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.1
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.1
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.1
## MFO###myMFO_classfr 0.1
## Random###myrandom_classfr 0.1
print(myplot_radar(radar_inp_df = dsp_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 15. Consider specifying shapes manually if you must have them.
## Warning: Removed 63 rows containing missing values (geom_point).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 15. Consider specifying shapes manually if you must have them.
print("Metrics used for model selection:"); print(get_model_sel_frmla())
## [1] "Metrics used for model selection:"
## ~-max.Accuracy.OOB - max.AUCROCR.OOB - max.AUCpROC.OOB - max.Accuracy.fit -
## opt.prob.threshold.OOB
## <environment: 0x7f8a80f446a0>
print(sprintf("Best model id: %s", dsp_models_df[1, "id"]))
## [1] "Best model id: Max.cor.Y##rcv#rpart"
glb_get_predictions <- function(df, mdl_id, rsp_var, prob_threshold_def=NULL, verbose=FALSE) {
mdl <- glb_models_lst[[mdl_id]]
clmnNames <- mygetPredictIds(rsp_var, mdl_id)
predct_var_name <- clmnNames$value
predct_prob_var_name <- clmnNames$prob
predct_accurate_var_name <- clmnNames$is.acc
predct_error_var_name <- clmnNames$err
predct_erabs_var_name <- clmnNames$err.abs
if (glb_is_regression) {
df[, predct_var_name] <- predict(mdl, newdata=df, type="raw")
if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_var_name) +
facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="glm"))
df[, predct_error_var_name] <- df[, predct_var_name] - df[, glb_rsp_var]
if (verbose) print(myplot_scatter(df, predct_var_name, predct_error_var_name) +
#facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="auto"))
if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_error_var_name) +
#facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
stat_smooth(method="glm"))
df[, predct_erabs_var_name] <- abs(df[, predct_error_var_name])
if (verbose) print(head(orderBy(reformulate(c("-", predct_erabs_var_name)), df)))
df[, predct_accurate_var_name] <- (df[, glb_rsp_var] == df[, predct_var_name])
}
if (glb_is_classification && glb_is_binomial) {
prob_threshold <- glb_models_df[glb_models_df$id == mdl_id,
"opt.prob.threshold.OOB"]
if (is.null(prob_threshold) || is.na(prob_threshold)) {
warning("Using default probability threshold: ", prob_threshold_def)
if (is.null(prob_threshold <- prob_threshold_def))
stop("Default probability threshold is NULL")
}
df[, predct_prob_var_name] <- predict(mdl, newdata = df, type = "prob")[, 2]
df[, predct_var_name] <-
factor(levels(df[, glb_rsp_var])[
(df[, predct_prob_var_name] >=
prob_threshold) * 1 + 1], levels(df[, glb_rsp_var]))
# if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_var_name) +
# facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="glm"))
df[, predct_error_var_name] <- df[, predct_var_name] != df[, glb_rsp_var]
# if (verbose) print(myplot_scatter(df, predct_var_name, predct_error_var_name) +
# #facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="auto"))
# if (verbose) print(myplot_scatter(df, glb_rsp_var, predct_error_var_name) +
# #facet_wrap(reformulate(glbFeatsCategory), scales = "free") +
# stat_smooth(method="glm"))
# if prediction is a TP (true +ve), measure distance from 1.0
tp <- which((df[, predct_var_name] == df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[2]))
df[tp, predct_erabs_var_name] <- abs(1 - df[tp, predct_prob_var_name])
#rowIx <- which.max(df[tp, predct_erabs_var_name]); df[tp, c(glb_id_var, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a TN (true -ve), measure distance from 0.0
tn <- which((df[, predct_var_name] == df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[1]))
df[tn, predct_erabs_var_name] <- abs(0 - df[tn, predct_prob_var_name])
#rowIx <- which.max(df[tn, predct_erabs_var_name]); df[tn, c(glb_id_var, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a FP (flse +ve), measure distance from 0.0
fp <- which((df[, predct_var_name] != df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[2]))
df[fp, predct_erabs_var_name] <- abs(0 - df[fp, predct_prob_var_name])
#rowIx <- which.max(df[fp, predct_erabs_var_name]); df[fp, c(glb_id_var, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
# if prediction is a FN (flse -ve), measure distance from 1.0
fn <- which((df[, predct_var_name] != df[, glb_rsp_var]) &
(df[, predct_var_name] == levels(df[, glb_rsp_var])[1]))
df[fn, predct_erabs_var_name] <- abs(1 - df[fn, predct_prob_var_name])
#rowIx <- which.max(df[fn, predct_erabs_var_name]); df[fn, c(glb_id_var, glb_rsp_var, predct_var_name, predct_prob_var_name, predct_erabs_var_name)][rowIx, ]
if (verbose) print(head(orderBy(reformulate(c("-", predct_erabs_var_name)), df)))
df[, predct_accurate_var_name] <- (df[, glb_rsp_var] == df[, predct_var_name])
}
if (glb_is_classification && !glb_is_binomial) {
df[, predct_var_name] <- predict(mdl, newdata = df, type = "raw")
df[, paste0(predct_var_name, ".prob")] <-
predict(mdl, newdata = df, type = "prob")
stop("Multinomial prediction error calculation needs to be implemented...")
}
return(df)
}
#stop(here"); glb2Sav(); glbObsAll <- savObsAll; glbObsTrn <- savObsTrn; glbObsFit <- savObsFit; glbObsOOB <- savObsOOB; sav_models_df <- glb_models_df; glb_models_df <- sav_models_df; glb_featsimp_df <- sav_featsimp_df
myget_category_stats <- function(obs_df, mdl_id, label) {
require(dplyr)
require(lazyeval)
predct_var_name <- mygetPredictIds(glb_rsp_var, mdl_id)$value
predct_error_var_name <- mygetPredictIds(glb_rsp_var, mdl_id)$err.abs
if (!predct_var_name %in% names(obs_df))
obs_df <- glb_get_predictions(obs_df, mdl_id, glb_rsp_var)
tmp_obs_df <- obs_df[, c(glbFeatsCategory, glb_rsp_var,
predct_var_name, predct_error_var_name)]
# tmp_obs_df <- obs_df %>%
# dplyr::select_(glbFeatsCategory, glb_rsp_var, predct_var_name, predct_error_var_name)
#dplyr::rename(startprice.log10.predict.RFE.X.glmnet.err=error_abs_OOB)
names(tmp_obs_df)[length(names(tmp_obs_df))] <- paste0("err.abs.", label)
ret_ctgry_df <- tmp_obs_df %>%
dplyr::group_by_(glbFeatsCategory) %>%
dplyr::summarise_(#interp(~sum(abs(var)), var=as.name(glb_rsp_var)),
interp(~sum(var), var=as.name(paste0("err.abs.", label))),
interp(~mean(var), var=as.name(paste0("err.abs.", label))),
interp(~n()))
names(ret_ctgry_df) <- c(glbFeatsCategory,
#paste0(glb_rsp_var, ".abs.", label, ".sum"),
paste0("err.abs.", label, ".sum"),
paste0("err.abs.", label, ".mean"),
paste0(".n.", label))
ret_ctgry_df <- dplyr::ungroup(ret_ctgry_df)
#colSums(ret_ctgry_df[, -grep(glbFeatsCategory, names(ret_ctgry_df))])
return(ret_ctgry_df)
}
#print(colSums((ctgry_df <- myget_category_stats(obs_df=glbObsFit, mdl_id="", label="fit"))[, -grep(glbFeatsCategory, names(ctgry_df))]))
if (!is.null(glb_mdl_ensemble)) {
fit.models_2_chunk_df <- myadd_chunk(fit.models_2_chunk_df,
paste0("fit.models_2_", mdl_id_pfx), major.inc = TRUE,
label.minor = "ensemble")
mdl_id_pfx <- "Ensemble"
if (#(glb_is_regression) |
((glb_is_classification) & (!glb_is_binomial)))
stop("Ensemble models not implemented yet for multinomial classification")
mygetEnsembleAutoMdlIds <- function() {
tmp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)
row.names(tmp_models_df) <- tmp_models_df$id
mdl_threshold_pos <-
min(which(grepl("MFO|Random|Baseline", tmp_models_df$id))) - 1
mdlIds <- tmp_models_df$id[1:mdl_threshold_pos]
return(mdlIds[!grepl("Ensemble", mdlIds)])
}
if (glb_mdl_ensemble == "auto") {
glb_mdl_ensemble <- mygetEnsembleAutoMdlIds()
mdl_id_pfx <- paste0(mdl_id_pfx, ".auto")
} else if (grepl("^%<d-%", glb_mdl_ensemble)) {
glb_mdl_ensemble <- eval(parse(text =
str_trim(unlist(strsplit(glb_mdl_ensemble, "%<d-%"))[2])))
}
for (mdl_id in glb_mdl_ensemble) {
if (!(mdl_id %in% names(glb_models_lst))) {
warning("Model ", mdl_id, " in glb_model_ensemble not found !")
next
}
glbObsFit <- glb_get_predictions(df = glbObsFit, mdl_id, glb_rsp_var)
glbObsOOB <- glb_get_predictions(df = glbObsOOB, mdl_id, glb_rsp_var)
}
#mdl_id_pfx <- "Ensemble.RFE"; mdlId <- paste0(mdl_id_pfx, ".glmnet")
#glb_mdl_ensemble <- gsub(mygetPredictIds$value, "", grep("RFE\\.X\\.(?!Interact)", row.names(glb_featsimp_df), perl = TRUE, value = TRUE), fixed = TRUE)
#varImp(glb_models_lst[[mdlId]])
#cor_df <- data.frame(cor=cor(glbObsFit[, glb_rsp_var], glbObsFit[, paste(mygetPredictIds$value, glb_mdl_ensemble)], use="pairwise.complete.obs"))
#glbObsFit <- glb_get_predictions(df=glbObsFit, "Ensemble.glmnet", glb_rsp_var);print(colSums((ctgry_df <- myget_category_stats(obs_df=glbObsFit, mdl_id="Ensemble.glmnet", label="fit"))[, -grep(glbFeatsCategory, names(ctgry_df))]))
### bid0_sp
# Better than MFO; models.n=28; min.RMSE.fit=0.0521233; err.abs.fit.sum=7.3631895
# old: Top x from auto; models.n= 5; min.RMSE.fit=0.06311047; err.abs.fit.sum=9.5937080
# RFE only ; models.n=16; min.RMSE.fit=0.05148588; err.abs.fit.sum=7.2875091
# RFE subset only ;models.n= 5; min.RMSE.fit=0.06040702; err.abs.fit.sum=9.059088
# RFE subset only ;models.n= 9; min.RMSE.fit=0.05933167; err.abs.fit.sum=8.7421288
# RFE subset only ;models.n=15; min.RMSE.fit=0.0584607; err.abs.fit.sum=8.5902066
# RFE subset only ;models.n=17; min.RMSE.fit=0.05496899; err.abs.fit.sum=8.0170431
# RFE subset only ;models.n=18; min.RMSE.fit=0.05441577; err.abs.fit.sum=7.837223
# RFE subset only ;models.n=16; min.RMSE.fit=0.05441577; err.abs.fit.sum=7.837223
### bid0_sp
### bid1_sp
# "auto"; err.abs.fit.sum=76.699774; min.RMSE.fit=0.2186429
# "RFE.X.*"; err.abs.fit.sum=; min.RMSE.fit=0.221114
### bid1_sp
indep_vars <- paste(mygetPredictIds(glb_rsp_var)$value, glb_mdl_ensemble, sep = "")
if (glb_is_classification)
indep_vars <- paste(indep_vars, ".prob", sep = "")
# Some models in glb_mdl_ensemble might not be fitted e.g. RFE.X.Interact
indep_vars <- intersect(indep_vars, names(glbObsFit))
# indep_vars <- grep(mygetPredictIds(glb_rsp_var)$value, names(glbObsFit), fixed=TRUE, value=TRUE)
# if (glb_is_regression)
# indep_vars <- indep_vars[!grepl("(err\\.abs|accurate)$", indep_vars)]
# if (glb_is_classification && glb_is_binomial)
# indep_vars <- grep("prob$", indep_vars, value=TRUE) else
# indep_vars <- indep_vars[!grepl("err$", indep_vars)]
#rfe_fit_ens_results <- myrun_rfe(glbObsFit, indep_vars)
for (method in c("glm", "glmnet")) {
for (trainControlMethod in
c("boot", "boot632", "cv", "repeatedcv"
#, "LOOCV" # tuneLength * nrow(fitDF)
, "LGOCV", "adaptive_cv"
#, "adaptive_boot" #error: adaptive$min should be less than 3
#, "adaptive_LGOCV" #error: adaptive$min should be less than 3
)) {
#sav_models_df <- glb_models_df; all.equal(sav_models_df, glb_models_df)
#glb_models_df <- sav_models_df; print(glb_models_df$id)
if ((method == "glm") && (trainControlMethod != "repeatedcv"))
# glm used only to identify outliers
next
ret_lst <- myfit_mdl(
mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = paste0(mdl_id_pfx, ".", trainControlMethod),
type = glb_model_type, tune.df = NULL,
trainControl.method = trainControlMethod,
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method)),
indep_vars = indep_vars, rsp_var = glb_rsp_var,
fit_df = glbObsFit, OOB_df = glbObsOOB)
}
}
dsp_models_df <- get_dsp_models_df()
}
if (is.null(glb_sel_mdl_id))
glb_sel_mdl_id <- dsp_models_df[1, "id"] else
print(sprintf("User specified selection: %s", glb_sel_mdl_id))
## [1] "User specified selection: All.X##rcv#glmnet"
myprint_mdl(glb_sel_mdl <- glb_models_lst[[glb_sel_mdl_id]])
## Length Class Mode
## a0 100 -none- numeric
## beta 26700 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 267 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -3.9128975142
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.0993818427
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 0.6780570283
## NDSSName.my.fctr#U.S.#Education
## -0.0538608422
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 0.6629269084
## NDSSName.my.fctrBusiness#Technology#
## 0.0167402411
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.0403659374
## NDSSName.my.fctrOpEd#Opinion#
## 0.7839563477
## NDSSName.my.fctrScience#Health#
## 0.8268022814
## NDSSName.my.fctrStyles##Fashion
## -0.0358394378
## NDSSName.my.fctrStyles#U.S.#
## 0.6505903792
## NDSSName.my.fctrTStyle##
## -0.0944336014
## PubDate.day.minutes.poly.1
## 6.1968659740
## PubDate.day.minutes.poly.2
## 2.7526995782
## PubDate.day.minutes.poly.4
## 1.0149860620
## PubDate.hour.fctr(15.3,23]
## 0.0158660084
## PubDate.last2.log1p
## 0.0049039456
## PubDate.last4.log1p
## 0.0020941831
## PubDate.wkend
## 0.1019986663
## WordCount.log1p
## 0.1377845090
## WordCount.root2
## 0.0237638430
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.1107203605
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -4.0365793853
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.3719600079
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 4.5046664811
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 0.5423083981
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 0.1741186123
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 1.8785299901
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -0.0058299785
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 0.0423465436
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -0.0029370848
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 0.0464038721
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -0.0027331891
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 0.0460673328
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 0.0311476654
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -0.0007315801
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 0.0237102812
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -0.0063533780
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -0.0085878059
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 0.0315807539
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -0.0047405213
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 0.0650963217
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 0.0083187966
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -0.0038312213
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 0.0671501716
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 0.0531800939
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -0.0015350453
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 0.0595488082
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -0.0091683893
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -0.0057844214
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 0.0428941930
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -0.0016995856
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 0.0400648704
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -0.0003591814
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 0.0398657040
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 0.0302335606
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 0.0209878355
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -0.0052426821
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -0.0079293622
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 0.0335177936
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -0.0045073437
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 0.0608649650
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 0.0046654736
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -0.0037544961
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 0.0592756628
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 0.0399694782
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -0.0013542140
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 0.0532508977
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -0.0079277443
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -0.0074876245
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 0.0398496916
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -0.0036507906
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 0.0466991152
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 0.0028638844
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -0.0036938119
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 0.0529339275
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 0.0380275686
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -0.0014099131
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 0.0358835221
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -0.0073495332
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -4.049609531
## NDSSName.my.fctr#Opinion#RoomForDebate
## -0.121989095
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 0.704262140
## NDSSName.my.fctr#U.S.#Education
## -0.063257524
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 0.677002877
## NDSSName.my.fctrBusiness#Technology#
## 0.028315897
## NDSSName.my.fctrForeign#World#AsiaPacific
## -0.050716705
## NDSSName.my.fctrOpEd#Opinion#
## 0.807935142
## NDSSName.my.fctrScience#Health#
## 0.862376733
## NDSSName.my.fctrStyles##Fashion
## -0.049433468
## NDSSName.my.fctrStyles#U.S.#
## 0.676296513
## NDSSName.my.fctrTStyle##
## -0.101555851
## PubDate.day.minutes.poly.1
## 6.611758257
## PubDate.day.minutes.poly.2
## 3.208695999
## PubDate.day.minutes.poly.4
## 1.326220683
## PubDate.hour.fctr(15.3,23]
## 0.020102410
## PubDate.last2.log1p
## 0.007238448
## PubDate.last4.log1p
## 0.004415541
## PubDate.wkday.fctr5
## -0.003012328
## PubDate.wkend
## 0.115562490
## WordCount.log1p
## 0.145706770
## WordCount.root2
## 0.025132374
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.376206087
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -5.504099804
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.588832251
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 5.291960917
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg
## 0.815535466
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 0.465223110
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg
## 1.318496131
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 2.817439917
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -0.007521570
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 0.043782452
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -0.003638768
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 0.047023523
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -0.003570344
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 0.046510695
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg
## 0.031064783
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -0.001725633
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg
## 0.023208838
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -0.006858960
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -0.010783785
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg
## 0.032428051
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -0.005690303
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 0.066411284
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 0.009998689
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -0.004894916
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 0.068521976
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 0.054480105
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg
## -0.002798763
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 0.061667821
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -0.009898408
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -0.007409073
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 0.044412471
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -0.002351539
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 0.040393133
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -0.001082352
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 0.040033297
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 0.030162125
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg
## -0.000410508
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg
## 0.020440278
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -0.005660356
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -0.009939087
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg
## 0.034448920
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -0.005376182
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 0.062117169
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 0.005947848
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -0.004732169
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 0.060222943
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 0.040229359
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -0.002471088
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 0.054940484
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -0.008547698
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -0.009359864
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 0.041065851
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -0.004400494
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 0.047084797
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 0.003959960
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -0.004625690
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 0.053705399
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 0.038354862
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -0.002455757
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 0.036241983
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -0.007927033
## [1] TRUE
# From here to save(), this should all be in one function
# these are executed in the same seq twice more:
# fit.data.training & predict.data.new chunks
print(sprintf("%s fit prediction diagnostics:", glb_sel_mdl_id))
## [1] "All.X##rcv#glmnet fit prediction diagnostics:"
glbObsFit <- glb_get_predictions(df = glbObsFit, mdl_id = glb_sel_mdl_id,
rsp_var = glb_rsp_var)
print(sprintf("%s OOB prediction diagnostics:", glb_sel_mdl_id))
## [1] "All.X##rcv#glmnet OOB prediction diagnostics:"
glbObsOOB <- glb_get_predictions(df = glbObsOOB, mdl_id = glb_sel_mdl_id,
rsp_var = glb_rsp_var)
glb_featsimp_df <-
myget_feats_importance(mdl=glb_sel_mdl, featsimp_df=NULL)
glb_featsimp_df[, paste0(glb_sel_mdl_id, ".imp")] <- glb_featsimp_df$imp
#mdl_id <-"RFE.X.glmnet"; glb_featsimp_df <- myget_feats_importance(glb_models_lst[[mdl_id]], glb_featsimp_df); glb_featsimp_df[, paste0(mdl_id, ".imp")] <- glb_featsimp_df$imp; print(glb_featsimp_df)
#print(head(sbst_featsimp_df <- subset(glb_featsimp_df, is.na(RFE.X.glmnet.imp) | (abs(RFE.X.YeoJohnson.glmnet.imp - RFE.X.glmnet.imp) > 0.0001), select=-imp)))
#print(orderBy(~ -cor.y.abs, subset(glb_feats_df, id %in% c(row.names(sbst_featsimp_df), "startprice.dcm1.is9", "D.weight.post.stop.sum"))))
print(glb_featsimp_df)
## imp
## PubDate.day.minutes.poly.1 100.00000
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 88.15594
## PubDate.day.minutes.poly.2 70.97396
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 66.84933
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 57.58351
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 55.69412
## PubDate.day.minutes.poly.4 55.19343
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 53.46899
## NDSSName.my.fctrScience#Health# 51.70030
## NDSSName.my.fctrOpEd#Opinion# 51.25612
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 50.91035
## NDSSName.my.fctr#Opinion#ThePublicEditor 50.37025
## NDSSName.my.fctrBusiness#Crosswords/Games# 50.15826
## NDSSName.my.fctrStyles#U.S.# 50.13310
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 47.89992
## WordCount.log1p 45.64733
## PubDate.wkend 45.38153
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 45.00131
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 44.98345
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 44.94701
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 44.94176
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 44.93139
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 44.88522
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 44.88194
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 44.87622
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 44.82052
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 44.81961
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 44.81554
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 44.79591
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 44.79069
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 44.76793
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 44.76367
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 44.76239
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 44.76087
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 44.74633
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 44.72830
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 44.71210
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 44.69504
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 44.68497
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 44.67727
## NDSSName.my.fctrBusiness#Technology# 44.64238
## WordCount.root2 44.63210
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 44.61881
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 44.59532
## PubDate.hour.fctr(15.3,23] 44.58457
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 44.50280
## PubDate.last2.log1p 44.47824
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 44.46899
## PubDate.last4.log1p 44.45424
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 44.45238
## .rnorm 44.42049
## NDSSName.my.fctr#Multimedia# 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 44.42049
## NDSSName.my.fctrCulture## 44.42049
## NDSSName.my.fctrCulture#Arts# 44.42049
## NDSSName.my.fctrForeign#World# 44.42049
## NDSSName.my.fctrMetro#N.Y./Region# 44.42049
## NDSSName.my.fctrTravel#Travel# 44.42049
## NDSSName.my.fctrmyOther 44.42049
## PubDate.date.fctr(7,13] 44.42049
## PubDate.date.fctr(13,19] 44.42049
## PubDate.date.fctr(19,25] 44.42049
## PubDate.date.fctr(25,31] 44.42049
## PubDate.day.minutes.poly.3 44.42049
## PubDate.day.minutes.poly.5 44.42049
## PubDate.hour.fctr(7.67,15.3] 44.42049
## PubDate.juliandate 44.42049
## PubDate.last16.log1p 44.42049
## PubDate.last32.log1p 44.42049
## PubDate.last8.log1p 44.42049
## PubDate.minute.fctr(14.8,29.5] 44.42049
## PubDate.minute.fctr(29.5,44.2] 44.42049
## PubDate.minute.fctr(44.2,59.1] 44.42049
## PubDate.month.fctr10 44.42049
## PubDate.month.fctr11 44.42049
## PubDate.month.fctr12 44.42049
## PubDate.second.fctr(14.8,29.5] 44.42049
## PubDate.second.fctr(29.5,44.2] 44.42049
## PubDate.second.fctr(44.2,59.1] 44.42049
## PubDate.wkday.fctr1 44.42049
## PubDate.wkday.fctr2 44.42049
## PubDate.wkday.fctr3 44.42049
## PubDate.wkday.fctr4 44.42049
## PubDate.wkday.fctr6 44.42049
## WordCount.nexp 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 44.41767
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 44.41247
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 44.40744
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 44.40155
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 44.40131
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 44.40130
## PubDate.wkday.fctr5 44.39981
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 44.39875
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 44.39148
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 44.39068
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 44.38427
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 44.38266
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 44.38183
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 44.38058
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 44.37617
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 44.37363
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 44.37301
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 44.36295
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 44.36011
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 44.35927
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 44.35398
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 44.34877
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 44.34392
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 44.33922
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 44.33746
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 44.33234
## NDSSName.my.fctrStyles##Fashion 44.02222
## NDSSName.my.fctrForeign#World#AsiaPacific 44.00596
## NDSSName.my.fctr#U.S.#Education 43.89767
## NDSSName.my.fctrTStyle## 43.56802
## NDSSName.my.fctr#Opinion#RoomForDebate 43.41965
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg 0.00000
## All.X##rcv#glmnet.imp
## PubDate.day.minutes.poly.1 100.00000
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 88.15594
## PubDate.day.minutes.poly.2 70.97396
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 66.84933
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 57.58351
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 55.69412
## PubDate.day.minutes.poly.4 55.19343
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 53.46899
## NDSSName.my.fctrScience#Health# 51.70030
## NDSSName.my.fctrOpEd#Opinion# 51.25612
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 50.91035
## NDSSName.my.fctr#Opinion#ThePublicEditor 50.37025
## NDSSName.my.fctrBusiness#Crosswords/Games# 50.15826
## NDSSName.my.fctrStyles#U.S.# 50.13310
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 47.89992
## WordCount.log1p 45.64733
## PubDate.wkend 45.38153
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 45.00131
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 44.98345
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 44.94701
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 44.94176
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 44.93139
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 44.88522
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 44.88194
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 44.87622
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 44.82052
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 44.81961
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 44.81554
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 44.79591
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 44.79069
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 44.76793
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 44.76367
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 44.76239
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 44.76087
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 44.74633
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 44.72830
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 44.71210
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 44.69504
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 44.68497
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 44.67727
## NDSSName.my.fctrBusiness#Technology# 44.64238
## WordCount.root2 44.63210
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 44.61881
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 44.59532
## PubDate.hour.fctr(15.3,23] 44.58457
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 44.50280
## PubDate.last2.log1p 44.47824
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 44.46899
## PubDate.last4.log1p 44.45424
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 44.45238
## .rnorm 44.42049
## NDSSName.my.fctr#Multimedia# 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 44.42049
## NDSSName.my.fctrCulture## 44.42049
## NDSSName.my.fctrCulture#Arts# 44.42049
## NDSSName.my.fctrForeign#World# 44.42049
## NDSSName.my.fctrMetro#N.Y./Region# 44.42049
## NDSSName.my.fctrTravel#Travel# 44.42049
## NDSSName.my.fctrmyOther 44.42049
## PubDate.date.fctr(7,13] 44.42049
## PubDate.date.fctr(13,19] 44.42049
## PubDate.date.fctr(19,25] 44.42049
## PubDate.date.fctr(25,31] 44.42049
## PubDate.day.minutes.poly.3 44.42049
## PubDate.day.minutes.poly.5 44.42049
## PubDate.hour.fctr(7.67,15.3] 44.42049
## PubDate.juliandate 44.42049
## PubDate.last16.log1p 44.42049
## PubDate.last32.log1p 44.42049
## PubDate.last8.log1p 44.42049
## PubDate.minute.fctr(14.8,29.5] 44.42049
## PubDate.minute.fctr(29.5,44.2] 44.42049
## PubDate.minute.fctr(44.2,59.1] 44.42049
## PubDate.month.fctr10 44.42049
## PubDate.month.fctr11 44.42049
## PubDate.month.fctr12 44.42049
## PubDate.second.fctr(14.8,29.5] 44.42049
## PubDate.second.fctr(29.5,44.2] 44.42049
## PubDate.second.fctr(44.2,59.1] 44.42049
## PubDate.wkday.fctr1 44.42049
## PubDate.wkday.fctr2 44.42049
## PubDate.wkday.fctr3 44.42049
## PubDate.wkday.fctr4 44.42049
## PubDate.wkday.fctr6 44.42049
## WordCount.nexp 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 44.41767
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 44.41247
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 44.40744
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 44.40155
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 44.40131
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 44.40130
## PubDate.wkday.fctr5 44.39981
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 44.39875
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 44.39148
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 44.39068
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 44.38427
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 44.38266
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 44.38183
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 44.38058
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 44.37617
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 44.37363
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 44.37301
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 44.36295
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 44.36011
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 44.35927
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 44.35398
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 44.34877
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 44.34392
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 44.33922
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 44.33746
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 44.33234
## NDSSName.my.fctrStyles##Fashion 44.02222
## NDSSName.my.fctrForeign#World#AsiaPacific 44.00596
## NDSSName.my.fctr#U.S.#Education 43.89767
## NDSSName.my.fctrTStyle## 43.56802
## NDSSName.my.fctr#Opinion#RoomForDebate 43.41965
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg 0.00000
# Used again in fit.data.training & predict.data.new chunks
glb_analytics_diag_plots <- function(obs_df, mdl_id, prob_threshold=NULL) {
if (!is.null(featsimp_df <- glb_featsimp_df)) {
featsimp_df$feat <- gsub("`(.*?)`", "\\1", row.names(featsimp_df))
featsimp_df$feat.interact <- gsub("(.*?):(.*)", "\\2", featsimp_df$feat)
featsimp_df$feat <- gsub("(.*?):(.*)", "\\1", featsimp_df$feat)
featsimp_df$feat.interact <-
ifelse(featsimp_df$feat.interact == featsimp_df$feat,
NA, featsimp_df$feat.interact)
featsimp_df$feat <-
gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat)
featsimp_df$feat.interact <-
gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat.interact)
featsimp_df <- orderBy(~ -imp.max,
summaryBy(imp ~ feat + feat.interact, data=featsimp_df,
FUN=max))
#rex_str=":(.*)"; txt_vctr=tail(featsimp_df$feat); ret_lst <- regexec(rex_str, txt_vctr); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])
featsimp_df <- subset(featsimp_df, !is.na(imp.max))
if (nrow(featsimp_df) > 5) {
warning("Limiting important feature scatter plots to 5 out of ",
nrow(featsimp_df))
featsimp_df <- head(featsimp_df, 5)
}
# if (!all(is.na(featsimp_df$feat.interact)))
# stop("not implemented yet")
rsp_var_out <- mygetPredictIds(glb_rsp_var, mdl_id)$value
for (var in featsimp_df$feat) {
plot_df <- melt(obs_df, id.vars = var,
measure.vars = c(glb_rsp_var, rsp_var_out))
print(myplot_scatter(plot_df, var, "value", colorcol_name = "variable",
facet_colcol_name = "variable", jitter = TRUE) +
guides(color = FALSE))
}
}
if (glb_is_regression) {
if (is.null(featsimp_df) || (nrow(featsimp_df) == 0))
warning("No important features in glb_fin_mdl") else
print(myplot_prediction_regression(df=obs_df,
feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2],
".rownames"),
feat_y=featsimp_df$feat[1],
rsp_var=glb_rsp_var, rsp_var_out=rsp_var_out,
id_vars=glb_id_var)
# + facet_wrap(reformulate(featsimp_df$feat[2])) # if [1 or 2] is a factor
# + geom_point(aes_string(color="<col_name>.fctr")) # to color the plot
)
}
if (glb_is_classification) {
if (is.null(featsimp_df) || (nrow(featsimp_df) == 0))
warning("No features in selected model are statistically important")
else print(myplot_prediction_classification(df = obs_df,
feat_x = ifelse(nrow(featsimp_df) > 1,
featsimp_df$feat[2], ".rownames"),
feat_y = featsimp_df$feat[1],
rsp_var = glb_rsp_var,
rsp_var_out = rsp_var_out,
id_vars = glb_id_var,
prob_threshold = prob_threshold))
}
}
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id,
prob_threshold = glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id)
## Warning in glb_analytics_diag_plots(obs_df = glbObsOOB, mdl_id =
## glb_sel_mdl_id, : Limiting important feature scatter plots to 5 out of 33
## [1] "Min/Max Boundaries: "
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 1 2555 N 0.01951631
## 2 302 N 0.16482880
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 1 N FALSE
## 2 Y TRUE
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 1 0.01951631
## 2 0.16482880
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 1 TRUE
## 2 FALSE
## Popular.fctr.All.X..rcv.glmnet.accurate
## 1 TRUE
## 2 FALSE
## Popular.fctr.All.X..rcv.glmnet.error .label
## 1 0.0000000 2555
## 2 0.0648288 302
## [1] "Inaccurate: "
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 1 4775 Y 0.04759226
## 2 4020 Y 0.04857241
## 3 172 Y 0.04931149
## 4 6354 Y 0.04967086
## 5 3554 Y 0.05093578
## 6 4745 Y 0.05247410
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 1 N TRUE
## 2 N TRUE
## 3 N TRUE
## 4 N TRUE
## 5 N TRUE
## 6 N TRUE
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 1 0.9524077
## 2 0.9514276
## 3 0.9506885
## 4 0.9503291
## 5 0.9490642
## 6 0.9475259
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## Popular.fctr.All.X..rcv.glmnet.accurate
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## Popular.fctr.All.X..rcv.glmnet.error
## 1 -0.05240774
## 2 -0.05142759
## 3 -0.05068851
## 4 -0.05032914
## 5 -0.04906422
## 6 -0.04752590
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 94 233 N 0.1037907
## 201 5705 N 0.1298935
## 219 3604 N 0.1367940
## 239 5422 N 0.1508383
## 243 11 N 0.1583395
## 384 5968 N 0.8593176
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 94 Y TRUE
## 201 Y TRUE
## 219 Y TRUE
## 239 Y TRUE
## 243 Y TRUE
## 384 Y TRUE
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 94 0.1037907
## 201 0.1298935
## 219 0.1367940
## 239 0.1508383
## 243 0.1583395
## 384 0.8593176
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 94 FALSE
## 201 FALSE
## 219 FALSE
## 239 FALSE
## 243 FALSE
## 384 FALSE
## Popular.fctr.All.X..rcv.glmnet.accurate
## 94 FALSE
## 201 FALSE
## 219 FALSE
## 239 FALSE
## 243 FALSE
## 384 FALSE
## Popular.fctr.All.X..rcv.glmnet.error
## 94 0.00379068
## 201 0.02989346
## 219 0.03679403
## 239 0.05083828
## 243 0.05833951
## 384 0.75931757
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 387 59 N 0.8683420
## 388 6479 N 0.8739690
## 389 6235 N 0.8885483
## 390 2995 N 0.8885720
## 391 4943 N 0.8953296
## 392 3590 N 0.9333991
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 387 Y TRUE
## 388 Y TRUE
## 389 Y TRUE
## 390 Y TRUE
## 391 Y TRUE
## 392 Y TRUE
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 387 0.8683420
## 388 0.8739690
## 389 0.8885483
## 390 0.8885720
## 391 0.8953296
## 392 0.9333991
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 387 FALSE
## 388 FALSE
## 389 FALSE
## 390 FALSE
## 391 FALSE
## 392 FALSE
## Popular.fctr.All.X..rcv.glmnet.accurate
## 387 FALSE
## 388 FALSE
## 389 FALSE
## 390 FALSE
## 391 FALSE
## 392 FALSE
## Popular.fctr.All.X..rcv.glmnet.error
## 387 0.7683420
## 388 0.7739690
## 389 0.7885483
## 390 0.7885720
## 391 0.7953296
## 392 0.8333991
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
if (!is.null(glbFeatsCategory)) {
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsFit, mdl_id = glb_sel_mdl_id,
label = "fit"),
by = glbFeatsCategory, all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsOOB, mdl_id = glb_sel_mdl_id,
label="OOB"),
#by=glbFeatsCategory, all=TRUE) glb_ctgry-df already contains .n.OOB ?
all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
if (any(grepl("OOB", glbMdlMetricsEval)))
print(orderBy(~-err.abs.OOB.mean, glbLvlCategory)) else
print(orderBy(~-err.abs.fit.mean, glbLvlCategory))
print(colSums(glbLvlCategory[, -grep(glbFeatsCategory, names(glbLvlCategory))]))
}
## NDSSName.my.fctr
## #Opinion#ThePublicEditor #Opinion#ThePublicEditor
## OpEd#Opinion# OpEd#Opinion#
## Styles#U.S.# Styles#U.S.#
## Science#Health# Science#Health#
## Business#Crosswords/Games# Business#Crosswords/Games#
## Business#Technology# Business#Technology#
## ## ##
## Business#BusinessDay#Dealbook Business#BusinessDay#Dealbook
## Culture#Arts# Culture#Arts#
## Metro#N.Y./Region# Metro#N.Y./Region#
## #Opinion#RoomForDebate #Opinion#RoomForDebate
## Styles##Fashion Styles##Fashion
## Business#BusinessDay#SmallBusiness Business#BusinessDay#SmallBusiness
## Travel#Travel# Travel#Travel#
## Foreign#World#AsiaPacific Foreign#World#AsiaPacific
## #Multimedia# #Multimedia#
## myOther myOther
## TStyle## TStyle##
## Culture## Culture##
## Foreign#World# Foreign#World#
## #U.S.#Education #U.S.#Education
## .n.OOB .n.Fit .n.Tst .freqRatio.Fit
## #Opinion#ThePublicEditor 4 16 10 0.003330558
## OpEd#Opinion# 89 437 164 0.090965862
## Styles#U.S.# 50 127 61 0.026436303
## Science#Health# 48 148 57 0.030807660
## Business#Crosswords/Games# 18 105 42 0.021856786
## Business#Technology# 126 213 114 0.044338052
## ## 371 913 342 0.190049958
## Business#BusinessDay#Dealbook 323 629 304 0.130932556
## Culture#Arts# 185 490 174 0.101998335
## Metro#N.Y./Region# 70 128 67 0.026644463
## #Opinion#RoomForDebate 20 42 20 0.008742714
## Styles##Fashion 15 104 15 0.021648626
## Business#BusinessDay#SmallBusiness 40 100 41 0.020815987
## Travel#Travel# 34 83 35 0.017277269
## Foreign#World#AsiaPacific 53 150 56 0.031223980
## #Multimedia# 49 92 52 0.019150708
## myOther 5 33 5 0.006869276
## TStyle## 101 623 105 0.129683597
## Culture## 1 NA 70 NA
## Foreign#World# 44 128 47 0.026644463
## #U.S.#Education 82 243 89 0.050582848
## .freqRatio.OOB .freqRatio.Tst
## #Opinion#ThePublicEditor 0.0023148148 0.005347594
## OpEd#Opinion# 0.0515046296 0.087700535
## Styles#U.S.# 0.0289351852 0.032620321
## Science#Health# 0.0277777778 0.030481283
## Business#Crosswords/Games# 0.0104166667 0.022459893
## Business#Technology# 0.0729166667 0.060962567
## ## 0.2146990741 0.182887701
## Business#BusinessDay#Dealbook 0.1869212963 0.162566845
## Culture#Arts# 0.1070601852 0.093048128
## Metro#N.Y./Region# 0.0405092593 0.035828877
## #Opinion#RoomForDebate 0.0115740741 0.010695187
## Styles##Fashion 0.0086805556 0.008021390
## Business#BusinessDay#SmallBusiness 0.0231481481 0.021925134
## Travel#Travel# 0.0196759259 0.018716578
## Foreign#World#AsiaPacific 0.0306712963 0.029946524
## #Multimedia# 0.0283564815 0.027807487
## myOther 0.0028935185 0.002673797
## TStyle## 0.0584490741 0.056149733
## Culture## 0.0005787037 0.037433155
## Foreign#World# 0.0254629630 0.025133690
## #U.S.#Education 0.0474537037 0.047593583
## err.abs.fit.sum err.abs.fit.mean .n.fit
## #Opinion#ThePublicEditor 5.942203 0.37138768 16
## OpEd#Opinion# 128.564823 0.29419868 437
## Styles#U.S.# 57.849040 0.45550425 127
## Science#Health# 61.584766 0.41611329 148
## Business#Crosswords/Games# 26.810474 0.25533785 105
## Business#Technology# 42.988874 0.20182570 213
## ## 107.836370 0.11811213 913
## Business#BusinessDay#Dealbook 85.592251 0.13607671 629
## Culture#Arts# 52.576076 0.10729811 490
## Metro#N.Y./Region# 16.417056 0.12825825 128
## #Opinion#RoomForDebate 4.582758 0.10911328 42
## Styles##Fashion 6.920679 0.06654499 104
## Business#BusinessDay#SmallBusiness 10.835795 0.10835795 100
## Travel#Travel# 5.017648 0.06045359 83
## Foreign#World#AsiaPacific 12.127615 0.08085077 150
## #Multimedia# 6.487383 0.07051503 92
## myOther 2.521537 0.07641022 33
## TStyle## 28.543592 0.04581636 623
## Culture## NA NA NA
## Foreign#World# 6.834537 0.05339482 128
## #U.S.#Education 11.012475 0.04531883 243
## err.abs.OOB.sum err.abs.OOB.mean
## #Opinion#ThePublicEditor 2.22415827 0.55603957
## OpEd#Opinion# 49.39451932 0.55499460
## Styles#U.S.# 26.10326970 0.52206539
## Science#Health# 24.34625696 0.50721369
## Business#Crosswords/Games# 8.92955276 0.49608626
## Business#Technology# 28.36190775 0.22509451
## ## 70.79255480 0.19081551
## Business#BusinessDay#Dealbook 61.53856052 0.19052186
## Culture#Arts# 32.62942406 0.17637527
## Metro#N.Y./Region# 11.83673574 0.16909622
## #Opinion#RoomForDebate 2.91796380 0.14589819
## Styles##Fashion 1.90060021 0.12670668
## Business#BusinessDay#SmallBusiness 4.82293575 0.12057339
## Travel#Travel# 2.94347588 0.08657282
## Foreign#World#AsiaPacific 4.30381605 0.08120408
## #Multimedia# 3.84497275 0.07846883
## myOther 0.38540899 0.07708180
## TStyle## 7.32300251 0.07250498
## Culture## 0.06292232 0.06292232
## Foreign#World# 2.41748545 0.05494285
## #U.S.#Education 4.26274429 0.05198469
## .n.OOB .n.Fit .n.Tst .freqRatio.Fit
## 1728.000000 NA 1870.000000 NA
## .freqRatio.OOB .freqRatio.Tst err.abs.fit.sum err.abs.fit.mean
## 1.000000 1.000000 NA NA
## .n.fit err.abs.OOB.sum err.abs.OOB.mean
## NA 351.342268 4.547164
write.csv(glbObsOOB[, c(glb_id_var,
grep(glb_rsp_var, names(glbObsOOB), fixed=TRUE, value=TRUE))],
paste0(gsub(".", "_", paste0(glb_out_pfx, glb_sel_mdl_id), fixed=TRUE),
"_OOBobs.csv"), row.names=FALSE)
fit.models_2_chunk_df <-
myadd_chunk(NULL, "fit.models_2_bgn", label.minor = "teardown")
## label step_major step_minor label_minor bgn end elapsed
## 1 fit.models_2_bgn 1 0 teardown 351.381 NA NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor label_minor bgn end elapsed
## 12 fit.models 6 2 2 336.921 351.391 14.471
## 13 fit.models 6 3 3 351.392 NA NA
# if (sum(is.na(glbObsAll$D.P.http)) > 0)
# stop("fit.models_3: Why is this happening ?")
#stop(here"); glb2Sav()
sync_glb_obs_df <- function() {
# Merge or cbind ?
for (col in setdiff(names(glbObsFit), names(glbObsTrn)))
glbObsTrn[glbObsTrn$.lcn == "Fit", col] <<- glbObsFit[, col]
for (col in setdiff(names(glbObsFit), names(glbObsAll)))
glbObsAll[glbObsAll$.lcn == "Fit", col] <<- glbObsFit[, col]
if (all(is.na(glbObsNew[, glb_rsp_var])))
for (col in setdiff(names(glbObsOOB), names(glbObsTrn)))
glbObsTrn[glbObsTrn$.lcn == "OOB", col] <<- glbObsOOB[, col]
for (col in setdiff(names(glbObsOOB), names(glbObsAll)))
glbObsAll[glbObsAll$.lcn == "OOB", col] <<- glbObsOOB[, col]
}
sync_glb_obs_df()
print(setdiff(names(glbObsNew), names(glbObsAll)))
## character(0)
if (glb_save_envir)
save(glb_feats_df,
glbObsAll, #glbObsTrn, glbObsFit, glbObsOOB, glbObsNew,
glb_models_df, dsp_models_df, glb_models_lst, glb_sel_mdl, glb_sel_mdl_id,
glb_model_type,
file=paste0(glb_out_pfx, "selmdl_dsk.RData"))
#load(paste0(glb_out_pfx, "selmdl_dsk.RData"))
rm(ret_lst)
## Warning in rm(ret_lst): object 'ret_lst' not found
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"model.selected")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=TRUE)
## label step_major step_minor label_minor bgn end
## 13 fit.models 6 3 3 351.392 357.541
## 14 fit.data.training 7 0 0 357.541 NA
## elapsed
## 13 6.149
## 14 NA
7.0: fit data training#load(paste0(glb_inp_pfx, "dsk.RData"))
#stop(here"); glb2Sav()
if (!is.null(glb_fin_mdl_id) && (glb_fin_mdl_id %in% names(glb_models_lst))) {
warning("Final model same as user selected model")
glb_fin_mdl <- glb_models_lst[[glb_fin_mdl_id]]
} else
# if (nrow(glbObsFit) + length(glbObsFitOutliers) == nrow(glbObsTrn))
if (!all(is.na(glbObsNew[, glb_rsp_var])))
{
warning("Final model same as glb_sel_mdl_id")
glb_fin_mdl_id <- paste0("Final.", glb_sel_mdl_id)
glb_fin_mdl <- glb_sel_mdl
glb_models_lst[[glb_fin_mdl_id]] <- glb_fin_mdl
} else {
# if (grepl("RFE", glb_sel_mdl_id) ||
# (!is.null(glb_mdl_ensemble) && grepl("RFE", glb_mdl_ensemble))) {
indep_vars <- myadjust_interaction_feats(subset(glb_feats_df,
!nzv & (exclude.as.feat != 1))[, "id"])
rfe_trn_results <-
myrun_rfe(glbObsTrn, indep_vars, glbRFESizes[["Final"]])
if (!isTRUE(all.equal(sort(predictors(rfe_trn_results)),
sort(predictors(rfe_fit_results))))) {
print("Diffs predictors(rfe_trn_results) vs. predictors(rfe_fit_results):")
print(setdiff(predictors(rfe_trn_results), predictors(rfe_fit_results)))
print("Diffs predictors(rfe_fit_results) vs. predictors(rfe_trn_results):")
print(setdiff(predictors(rfe_fit_results), predictors(rfe_trn_results)))
}
# }
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(myget_feats_importance(glb_sel_mdl), imp > 5)
# Fit selected models on glbObsTrn
for (mdl_id in gsub(".prob", "",
gsub(mygetPredictIds(glb_rsp_var)$value, "", row.names(mdlimp_df), fixed = TRUE),
fixed = TRUE)) {
mdl_id_components <- unlist(strsplit(mdl_id, "[.]"))
mdlIdPfx <- paste0(c(head(mdl_id_components, -1), "Train"),
collapse = ".")
if (grepl("RFE\\.X\\.", mdlIdPfx))
mdlIndepVars <- myadjust_interaction_feats(myextract_actual_feats(
predictors(rfe_trn_results))) else
mdlIndepVars <- trim(unlist(
strsplit(glb_models_df[glb_models_df$id == mdl_id, "feats"], "[,]")))
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdlIdPfx,
type = glb_model_type, tune.df = glb_tune_models_df,
trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = tail(mdl_id_components, 1))),
indep_vars = mdlIndepVars,
rsp_var = glb_rsp_var,
fit_df = glbObsTrn, OOB_df = NULL)
glbObsTrn <- glb_get_predictions(df = glbObsTrn,
mdl_id = tail(glb_models_df$id, 1),
rsp_var = glb_rsp_var,
prob_threshold_def =
subset(glb_models_df, id == mdl_id)$opt.prob.threshold.OOB)
glbObsNew <- glb_get_predictions(df = glbObsNew,
mdl_id = tail(glb_models_df$id, 1),
rsp_var = glb_rsp_var,
prob_threshold_def =
subset(glb_models_df, id == mdl_id)$opt.prob.threshold.OOB)
}
}
# "Final" model
if ((model_method <- glb_sel_mdl$method) == "custom")
# get actual method from the mdl_id
model_method <- tail(unlist(strsplit(glb_sel_mdl_id, "[.]")), 1)
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(myget_feats_importance(glb_sel_mdl), imp > 5)
if (glb_is_classification && glb_is_binomial)
indep_vars_vctr <- gsub("(.*)\\.(.*)\\.prob", "\\1\\.Train\\.\\2\\.prob",
row.names(mdlimp_df)) else
indep_vars_vctr <- gsub("(.*)\\.(.*)", "\\1\\.Train\\.\\2",
row.names(mdlimp_df))
} else
if (grepl("RFE.X", glb_sel_mdl_id, fixed = TRUE)) {
indep_vars_vctr <- myextract_actual_feats(predictors(rfe_trn_results))
} else indep_vars_vctr <-
trim(unlist(strsplit(glb_models_df[glb_models_df$id ==
glb_sel_mdl_id
, "feats"], "[,]")))
if (!is.null(glb_preproc_methods) &&
((match_pos <- regexpr(gsub(".", "\\.",
paste(glb_preproc_methods, collapse = "|"),
fixed = TRUE), glb_sel_mdl_id)) != -1))
ths_preProcess <- str_sub(glb_sel_mdl_id, match_pos,
match_pos + attr(match_pos, "match.length") - 1) else
ths_preProcess <- NULL
mdl_id_pfx <- ifelse(grepl("Ensemble", glb_sel_mdl_id),
"Final.Ensemble", "Final")
trnobs_df <- if (is.null(glbObsTrnOutliers[[mdl_id_pfx]])) glbObsTrn else
glbObsTrn[!(glbObsTrn[, glb_id_var] %in%
glbObsTrnOutliers[[mdl_id_pfx]]), ]
# Force fitting of Final.glm to identify outliers
#method_vctr <- unique(c("glm", myparseMdlId(glb_sel_mdl_id)$alg))
# or skip glm for speed
method_vctr <- myparseMdlId(glb_sel_mdl_id)$alg
for (method in method_vctr) {
#source("caret_nominalTrainWorkflow.R")
# glmnet requires at least 2 indep vars
if ((length(indep_vars_vctr) == 1) && (method %in% "glmnet"))
next
ret_lst <-
myfit_mdl(mdl_specs_lst = myinit_mdl_specs_lst(mdl_specs_lst = list(
id.prefix = mdl_id_pfx,
type = glb_model_type, trainControl.method = "repeatedcv",
trainControl.number = glb_rcv_n_folds,
trainControl.repeats = glb_rcv_n_repeats,
trainControl.classProbs = glb_is_classification,
trainControl.summaryFunction = glbMdlMetricSummaryFn,
trainControl.allowParallel = if (method %in% c("glm", "glmnet")) FALSE else TRUE,
train.metric = glbMdlMetricSummary,
train.maximize = glbMdlMetricMaximize,
train.method = method,
train.preProcess = ths_preProcess)),
indep_vars = indep_vars_vctr, rsp_var = glb_rsp_var,
fit_df = trnobs_df, OOB_df = NULL)
}
if ((length(method_vctr) == 1) || (method != "glm")) {
glb_fin_mdl <- glb_models_lst[[length(glb_models_lst)]]
glb_fin_mdl_id <- glb_models_df[length(glb_models_lst), "id"]
}
}
## +(rfe) fit Fold1.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep1 size: 60
## +(rfe) imp Fold1.Rep1
## -(rfe) imp Fold1.Rep1
## +(rfe) fit Fold1.Rep1 size: 32
## -(rfe) fit Fold1.Rep1 size: 32
## +(rfe) fit Fold1.Rep1 size: 16
## -(rfe) fit Fold1.Rep1 size: 16
## +(rfe) fit Fold1.Rep1 size: 8
## -(rfe) fit Fold1.Rep1 size: 8
## +(rfe) fit Fold1.Rep1 size: 4
## -(rfe) fit Fold1.Rep1 size: 4
## +(rfe) fit Fold1.Rep1 size: 2
## -(rfe) fit Fold1.Rep1 size: 2
## +(rfe) fit Fold2.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep1 size: 60
## +(rfe) imp Fold2.Rep1
## -(rfe) imp Fold2.Rep1
## +(rfe) fit Fold2.Rep1 size: 32
## -(rfe) fit Fold2.Rep1 size: 32
## +(rfe) fit Fold2.Rep1 size: 16
## -(rfe) fit Fold2.Rep1 size: 16
## +(rfe) fit Fold2.Rep1 size: 8
## -(rfe) fit Fold2.Rep1 size: 8
## +(rfe) fit Fold2.Rep1 size: 4
## -(rfe) fit Fold2.Rep1 size: 4
## +(rfe) fit Fold2.Rep1 size: 2
## -(rfe) fit Fold2.Rep1 size: 2
## +(rfe) fit Fold3.Rep1 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep1 size: 60
## +(rfe) imp Fold3.Rep1
## -(rfe) imp Fold3.Rep1
## +(rfe) fit Fold3.Rep1 size: 32
## -(rfe) fit Fold3.Rep1 size: 32
## +(rfe) fit Fold3.Rep1 size: 16
## -(rfe) fit Fold3.Rep1 size: 16
## +(rfe) fit Fold3.Rep1 size: 8
## -(rfe) fit Fold3.Rep1 size: 8
## +(rfe) fit Fold3.Rep1 size: 4
## -(rfe) fit Fold3.Rep1 size: 4
## +(rfe) fit Fold3.Rep1 size: 2
## -(rfe) fit Fold3.Rep1 size: 2
## +(rfe) fit Fold1.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep2 size: 60
## +(rfe) imp Fold1.Rep2
## -(rfe) imp Fold1.Rep2
## +(rfe) fit Fold1.Rep2 size: 32
## -(rfe) fit Fold1.Rep2 size: 32
## +(rfe) fit Fold1.Rep2 size: 16
## -(rfe) fit Fold1.Rep2 size: 16
## +(rfe) fit Fold1.Rep2 size: 8
## -(rfe) fit Fold1.Rep2 size: 8
## +(rfe) fit Fold1.Rep2 size: 4
## -(rfe) fit Fold1.Rep2 size: 4
## +(rfe) fit Fold1.Rep2 size: 2
## -(rfe) fit Fold1.Rep2 size: 2
## +(rfe) fit Fold2.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep2 size: 60
## +(rfe) imp Fold2.Rep2
## -(rfe) imp Fold2.Rep2
## +(rfe) fit Fold2.Rep2 size: 32
## -(rfe) fit Fold2.Rep2 size: 32
## +(rfe) fit Fold2.Rep2 size: 16
## -(rfe) fit Fold2.Rep2 size: 16
## +(rfe) fit Fold2.Rep2 size: 8
## -(rfe) fit Fold2.Rep2 size: 8
## +(rfe) fit Fold2.Rep2 size: 4
## -(rfe) fit Fold2.Rep2 size: 4
## +(rfe) fit Fold2.Rep2 size: 2
## -(rfe) fit Fold2.Rep2 size: 2
## +(rfe) fit Fold3.Rep2 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep2 size: 60
## +(rfe) imp Fold3.Rep2
## -(rfe) imp Fold3.Rep2
## +(rfe) fit Fold3.Rep2 size: 32
## -(rfe) fit Fold3.Rep2 size: 32
## +(rfe) fit Fold3.Rep2 size: 16
## -(rfe) fit Fold3.Rep2 size: 16
## +(rfe) fit Fold3.Rep2 size: 8
## -(rfe) fit Fold3.Rep2 size: 8
## +(rfe) fit Fold3.Rep2 size: 4
## -(rfe) fit Fold3.Rep2 size: 4
## +(rfe) fit Fold3.Rep2 size: 2
## -(rfe) fit Fold3.Rep2 size: 2
## +(rfe) fit Fold1.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold1.Rep3 size: 60
## +(rfe) imp Fold1.Rep3
## -(rfe) imp Fold1.Rep3
## +(rfe) fit Fold1.Rep3 size: 32
## -(rfe) fit Fold1.Rep3 size: 32
## +(rfe) fit Fold1.Rep3 size: 16
## -(rfe) fit Fold1.Rep3 size: 16
## +(rfe) fit Fold1.Rep3 size: 8
## -(rfe) fit Fold1.Rep3 size: 8
## +(rfe) fit Fold1.Rep3 size: 4
## -(rfe) fit Fold1.Rep3 size: 4
## +(rfe) fit Fold1.Rep3 size: 2
## -(rfe) fit Fold1.Rep3 size: 2
## +(rfe) fit Fold2.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold2.Rep3 size: 60
## +(rfe) imp Fold2.Rep3
## -(rfe) imp Fold2.Rep3
## +(rfe) fit Fold2.Rep3 size: 32
## -(rfe) fit Fold2.Rep3 size: 32
## +(rfe) fit Fold2.Rep3 size: 16
## -(rfe) fit Fold2.Rep3 size: 16
## +(rfe) fit Fold2.Rep3 size: 8
## -(rfe) fit Fold2.Rep3 size: 8
## +(rfe) fit Fold2.Rep3 size: 4
## -(rfe) fit Fold2.Rep3 size: 4
## +(rfe) fit Fold2.Rep3 size: 2
## -(rfe) fit Fold2.Rep3 size: 2
## +(rfe) fit Fold3.Rep3 size: 60
## Warning in lda.default(x, grouping, ...): variables are collinear
## -(rfe) fit Fold3.Rep3 size: 60
## +(rfe) imp Fold3.Rep3
## -(rfe) imp Fold3.Rep3
## +(rfe) fit Fold3.Rep3 size: 32
## -(rfe) fit Fold3.Rep3 size: 32
## +(rfe) fit Fold3.Rep3 size: 16
## -(rfe) fit Fold3.Rep3 size: 16
## +(rfe) fit Fold3.Rep3 size: 8
## -(rfe) fit Fold3.Rep3 size: 8
## +(rfe) fit Fold3.Rep3 size: 4
## -(rfe) fit Fold3.Rep3 size: 4
## +(rfe) fit Fold3.Rep3 size: 2
## -(rfe) fit Fold3.Rep3 size: 2
## Warning in lda.default(x, grouping, ...): variables are collinear
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (3 fold, repeated 3 times)
##
## Resampling performance over subset size:
##
## Variables Accuracy Kappa AccuracySD KappaSD Selected
## 2 0.8204 0.03718 0.003445 0.01035
## 4 0.8760 0.44502 0.003122 0.01919
## 8 0.8738 0.44420 0.003283 0.02061
## 16 0.9016 0.63787 0.006552 0.02352
## 32 0.9013 0.63732 0.006547 0.02350
## 60 0.9029 0.64607 0.006800 0.02399 *
##
## The top 5 variables (out of 60):
## WordCount.log1p, WordCount.root2, WordCount.nexp, NDSSName.my.fctrOpEd#Opinion#, PubDate.day.minutes.poly.1
##
## [1] "WordCount.log1p"
## [2] "WordCount.root2"
## [3] "WordCount.nexp"
## [4] "NDSSName.my.fctrOpEd#Opinion#"
## [5] "PubDate.day.minutes.poly.1"
## [6] "PubDate.day.minutes.poly.4"
## [7] "PubDate.hour.fctr(15.3,23]"
## [8] "PubDate.last4.log1p"
## [9] "PubDate.last2.log1p"
## [10] "NDSSName.my.fctrScience#Health#"
## [11] "NDSSName.my.fctrBusiness#Crosswords/Games#"
## [12] "PubDate.day.minutes.poly.5"
## [13] "PubDate.last8.log1p"
## [14] "NDSSName.my.fctrStyles#U.S.#"
## [15] "PubDate.wkend"
## [16] "PubDate.last16.log1p"
## [17] "NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg"
## [18] "PubDate.day.minutes.poly.2"
## [19] "PubDate.juliandate"
## [20] "PubDate.wkday.fctr6"
## [21] "PubDate.month.fctr11"
## [22] "PubDate.second.fctr(14.8,29.5]"
## [23] "PubDate.date.fctr(7,13]"
## [24] ".rnorm"
## [25] "NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg"
## [26] "PubDate.wkday.fctr1"
## [27] "PubDate.day.minutes.poly.3"
## [28] "NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg"
## [29] "PubDate.date.fctr(25,31]"
## [30] "PubDate.last32.log1p"
## [31] "PubDate.hour.fctr(7.67,15.3]"
## [32] "NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg"
## [33] "PubDate.minute.fctr(14.8,29.5]"
## [34] "NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg"
## [35] "PubDate.month.fctr10"
## [36] "NDSSName.my.fctrBusiness#Technology#"
## [37] "NDSSName.my.fctrmyOther"
## [38] "PubDate.wkday.fctr3"
## [39] "PubDate.date.fctr(13,19]"
## [40] "PubDate.second.fctr(29.5,44.2]"
## [41] "PubDate.minute.fctr(44.2,59.1]"
## [42] "PubDate.wkday.fctr4"
## [43] "PubDate.second.fctr(44.2,59.1]"
## [44] "NDSSName.my.fctr#Opinion#RoomForDebate"
## [45] "PubDate.date.fctr(19,25]"
## [46] "NDSSName.my.fctrMetro#N.Y./Region#"
## [47] "NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness"
## [48] "NDSSName.my.fctrTravel#Travel#"
## [49] "NDSSName.my.fctrStyles##Fashion"
## [50] "NDSSName.my.fctr#Multimedia#"
## [51] "PubDate.wkday.fctr2"
## [52] "NDSSName.my.fctrForeign#World#"
## [53] "NDSSName.my.fctrForeign#World#AsiaPacific"
## [54] "PubDate.wkday.fctr5"
## [55] "PubDate.minute.fctr(29.5,44.2]"
## [56] "NDSSName.my.fctr#U.S.#Education"
## [57] "NDSSName.my.fctrCulture#Arts#"
## [58] "NDSSName.my.fctrBusiness#BusinessDay#Dealbook"
## [59] "NDSSName.my.fctr##"
## [60] "NDSSName.my.fctrTStyle##"
## [1] "fitting model: Final##rcv#glmnet"
## [1] " indep_vars: WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg"
## + Fold1.Rep1: alpha=0.100, lambda=0.07781
## - Fold1.Rep1: alpha=0.100, lambda=0.07781
## + Fold1.Rep1: alpha=0.325, lambda=0.07781
## - Fold1.Rep1: alpha=0.325, lambda=0.07781
## + Fold1.Rep1: alpha=0.550, lambda=0.07781
## - Fold1.Rep1: alpha=0.550, lambda=0.07781
## + Fold1.Rep1: alpha=0.775, lambda=0.07781
## - Fold1.Rep1: alpha=0.775, lambda=0.07781
## + Fold1.Rep1: alpha=1.000, lambda=0.07781
## - Fold1.Rep1: alpha=1.000, lambda=0.07781
## + Fold2.Rep1: alpha=0.100, lambda=0.07781
## - Fold2.Rep1: alpha=0.100, lambda=0.07781
## + Fold2.Rep1: alpha=0.325, lambda=0.07781
## - Fold2.Rep1: alpha=0.325, lambda=0.07781
## + Fold2.Rep1: alpha=0.550, lambda=0.07781
## - Fold2.Rep1: alpha=0.550, lambda=0.07781
## + Fold2.Rep1: alpha=0.775, lambda=0.07781
## - Fold2.Rep1: alpha=0.775, lambda=0.07781
## + Fold2.Rep1: alpha=1.000, lambda=0.07781
## - Fold2.Rep1: alpha=1.000, lambda=0.07781
## + Fold3.Rep1: alpha=0.100, lambda=0.07781
## - Fold3.Rep1: alpha=0.100, lambda=0.07781
## + Fold3.Rep1: alpha=0.325, lambda=0.07781
## - Fold3.Rep1: alpha=0.325, lambda=0.07781
## + Fold3.Rep1: alpha=0.550, lambda=0.07781
## - Fold3.Rep1: alpha=0.550, lambda=0.07781
## + Fold3.Rep1: alpha=0.775, lambda=0.07781
## - Fold3.Rep1: alpha=0.775, lambda=0.07781
## + Fold3.Rep1: alpha=1.000, lambda=0.07781
## - Fold3.Rep1: alpha=1.000, lambda=0.07781
## + Fold1.Rep2: alpha=0.100, lambda=0.07781
## - Fold1.Rep2: alpha=0.100, lambda=0.07781
## + Fold1.Rep2: alpha=0.325, lambda=0.07781
## - Fold1.Rep2: alpha=0.325, lambda=0.07781
## + Fold1.Rep2: alpha=0.550, lambda=0.07781
## - Fold1.Rep2: alpha=0.550, lambda=0.07781
## + Fold1.Rep2: alpha=0.775, lambda=0.07781
## - Fold1.Rep2: alpha=0.775, lambda=0.07781
## + Fold1.Rep2: alpha=1.000, lambda=0.07781
## - Fold1.Rep2: alpha=1.000, lambda=0.07781
## + Fold2.Rep2: alpha=0.100, lambda=0.07781
## - Fold2.Rep2: alpha=0.100, lambda=0.07781
## + Fold2.Rep2: alpha=0.325, lambda=0.07781
## - Fold2.Rep2: alpha=0.325, lambda=0.07781
## + Fold2.Rep2: alpha=0.550, lambda=0.07781
## - Fold2.Rep2: alpha=0.550, lambda=0.07781
## + Fold2.Rep2: alpha=0.775, lambda=0.07781
## - Fold2.Rep2: alpha=0.775, lambda=0.07781
## + Fold2.Rep2: alpha=1.000, lambda=0.07781
## - Fold2.Rep2: alpha=1.000, lambda=0.07781
## + Fold3.Rep2: alpha=0.100, lambda=0.07781
## - Fold3.Rep2: alpha=0.100, lambda=0.07781
## + Fold3.Rep2: alpha=0.325, lambda=0.07781
## - Fold3.Rep2: alpha=0.325, lambda=0.07781
## + Fold3.Rep2: alpha=0.550, lambda=0.07781
## - Fold3.Rep2: alpha=0.550, lambda=0.07781
## + Fold3.Rep2: alpha=0.775, lambda=0.07781
## - Fold3.Rep2: alpha=0.775, lambda=0.07781
## + Fold3.Rep2: alpha=1.000, lambda=0.07781
## - Fold3.Rep2: alpha=1.000, lambda=0.07781
## + Fold1.Rep3: alpha=0.100, lambda=0.07781
## - Fold1.Rep3: alpha=0.100, lambda=0.07781
## + Fold1.Rep3: alpha=0.325, lambda=0.07781
## - Fold1.Rep3: alpha=0.325, lambda=0.07781
## + Fold1.Rep3: alpha=0.550, lambda=0.07781
## - Fold1.Rep3: alpha=0.550, lambda=0.07781
## + Fold1.Rep3: alpha=0.775, lambda=0.07781
## - Fold1.Rep3: alpha=0.775, lambda=0.07781
## + Fold1.Rep3: alpha=1.000, lambda=0.07781
## - Fold1.Rep3: alpha=1.000, lambda=0.07781
## + Fold2.Rep3: alpha=0.100, lambda=0.07781
## - Fold2.Rep3: alpha=0.100, lambda=0.07781
## + Fold2.Rep3: alpha=0.325, lambda=0.07781
## - Fold2.Rep3: alpha=0.325, lambda=0.07781
## + Fold2.Rep3: alpha=0.550, lambda=0.07781
## - Fold2.Rep3: alpha=0.550, lambda=0.07781
## + Fold2.Rep3: alpha=0.775, lambda=0.07781
## - Fold2.Rep3: alpha=0.775, lambda=0.07781
## + Fold2.Rep3: alpha=1.000, lambda=0.07781
## - Fold2.Rep3: alpha=1.000, lambda=0.07781
## + Fold3.Rep3: alpha=0.100, lambda=0.07781
## - Fold3.Rep3: alpha=0.100, lambda=0.07781
## + Fold3.Rep3: alpha=0.325, lambda=0.07781
## - Fold3.Rep3: alpha=0.325, lambda=0.07781
## + Fold3.Rep3: alpha=0.550, lambda=0.07781
## - Fold3.Rep3: alpha=0.550, lambda=0.07781
## + Fold3.Rep3: alpha=0.775, lambda=0.07781
## - Fold3.Rep3: alpha=0.775, lambda=0.07781
## + Fold3.Rep3: alpha=1.000, lambda=0.07781
## - Fold3.Rep3: alpha=1.000, lambda=0.07781
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.325, lambda = 0.0168 on full training set
## Length Class Mode
## a0 100 -none- numeric
## beta 26700 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 267 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -5.542323e+00
## NDSSName.my.fctr#Multimedia#
## -6.698617e-02
## NDSSName.my.fctr#Opinion#RoomForDebate
## -5.623451e-01
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 1.793776e+00
## NDSSName.my.fctr#U.S.#Education
## -2.145070e-01
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 1.016826e+00
## NDSSName.my.fctrForeign#World#
## -9.522547e-02
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.795441e-01
## NDSSName.my.fctrOpEd#Opinion#
## 1.283466e+00
## NDSSName.my.fctrScience#Health#
## 1.597700e+00
## NDSSName.my.fctrStyles##Fashion
## -3.096995e-01
## NDSSName.my.fctrStyles#U.S.#
## 9.914393e-01
## NDSSName.my.fctrTStyle##
## -1.625248e-01
## NDSSName.my.fctrmyOther
## -7.727191e-02
## PubDate.day.minutes.poly.1
## 1.137380e+01
## PubDate.day.minutes.poly.2
## 9.802958e+00
## PubDate.day.minutes.poly.3
## 2.057539e+00
## PubDate.day.minutes.poly.4
## 5.375695e+00
## PubDate.last16.log1p
## 3.517182e-02
## PubDate.minute.fctr(29.5,44.2]
## -3.520280e-02
## PubDate.wkday.fctr5
## -5.840836e-02
## PubDate.wkend
## 2.889822e-01
## WordCount.log1p
## 2.705197e-01
## WordCount.root2
## 4.632042e-02
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.074824e+00
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -1.811794e+01
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.681690e+00
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 4.073885e+00
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg
## -1.488289e-01
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 1.930600e+00
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 4.647261e+00
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg
## 3.263717e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -1.285205e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 3.017551e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -8.077726e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 5.207263e-02
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg
## -2.035752e-05
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -7.887719e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 1.819873e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -2.814127e-04
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -8.078682e-03
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg
## -3.826349e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -3.604175e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -1.794953e-02
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg
## -1.346406e-02
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 4.600379e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 1.563790e-02
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -1.770865e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 7.640129e-02
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 5.210975e-02
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 7.217487e-02
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -1.893065e-02
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg
## 1.051640e-02
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -1.063283e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 5.601627e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -3.846706e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 4.191745e-02
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -2.353556e-04
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 2.968029e-02
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 9.845928e-03
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -1.407795e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -2.521560e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -1.749114e-02
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg
## -5.429529e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 5.492200e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 1.884005e-02
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg
## -3.460095e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -1.829494e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 5.441072e-02
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 2.057725e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -1.852684e-03
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 7.399400e-02
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -1.380925e-02
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg
## -1.023127e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -2.476523e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 8.362221e-03
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -1.019486e-02
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 1.159691e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 1.131037e-02
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg
## -4.675237e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -1.274316e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 4.354344e-02
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 2.431399e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -3.676436e-03
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg
## 1.689771e-03
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -1.238645e-02
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -5.637527e+00
## NDSSName.my.fctr#Multimedia#
## -9.721687e-02
## NDSSName.my.fctr#Opinion#RoomForDebate
## -6.026005e-01
## NDSSName.my.fctr#Opinion#ThePublicEditor
## 1.868656e+00
## NDSSName.my.fctr#U.S.#Education
## -2.325402e-01
## NDSSName.my.fctrBusiness#Crosswords/Games#
## 1.050116e+00
## NDSSName.my.fctrForeign#World#
## -1.146068e-01
## NDSSName.my.fctrForeign#World#AsiaPacific
## -1.914356e-01
## NDSSName.my.fctrOpEd#Opinion#
## 1.334366e+00
## NDSSName.my.fctrScience#Health#
## 1.653019e+00
## NDSSName.my.fctrStyles##Fashion
## -3.435431e-01
## NDSSName.my.fctrStyles#U.S.#
## 1.016286e+00
## NDSSName.my.fctrTStyle##
## -1.716216e-01
## NDSSName.my.fctrmyOther
## -1.068363e-01
## PubDate.day.minutes.poly.1
## 1.169368e+01
## PubDate.day.minutes.poly.2
## 1.016678e+01
## PubDate.day.minutes.poly.3
## 2.268003e+00
## PubDate.day.minutes.poly.4
## 5.584821e+00
## PubDate.juliandate
## -2.370074e-04
## PubDate.last16.log1p
## 4.295088e-02
## PubDate.last2.log1p
## 6.201620e-04
## PubDate.minute.fctr(29.5,44.2]
## -4.908678e-02
## PubDate.wkday.fctr1
## 9.187065e-03
## PubDate.wkday.fctr5
## -6.866122e-02
## PubDate.wkend
## 2.948203e-01
## WordCount.log1p
## 2.792326e-01
## WordCount.root2
## 4.715917e-02
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg
## 1.260867e+00
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg
## -1.990814e+01
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg
## -1.213296e+00
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg
## 1.835231e+00
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg
## 4.401857e+00
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg
## 9.886105e-02
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg
## -6.376423e-01
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg
## 2.227623e+00
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg
## 5.801401e+00
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg
## 4.244655e-03
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg
## -1.091960e-05
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg
## -1.349052e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg
## 2.954819e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg
## -8.995113e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg
## 5.293481e-02
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg
## -1.232911e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg
## -8.685562e-03
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg
## 1.619852e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg
## -1.412020e-03
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg
## -8.464887e-03
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg
## -1.407282e-04
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg
## -6.692419e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg
## -3.837092e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg
## -1.955029e-02
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg
## -1.599387e-02
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg
## 4.544679e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg
## 1.648170e-02
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg
## -5.302294e-04
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg
## -1.917401e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg
## 7.786526e-02
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg
## 5.243085e-02
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg
## 7.313559e-02
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg
## -2.014476e-02
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg
## -9.519253e-04
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg
## 1.186537e-02
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg
## -1.143058e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg
## 5.745902e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg
## -4.745142e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg
## 4.178039e-02
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg
## -6.801555e-04
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg
## 2.961820e-02
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg
## 8.996525e-03
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg
## -1.420593e-03
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg
## -2.991567e-04
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg
## -2.638927e-02
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg
## -1.900787e-02
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg
## -7.096405e-03
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg
## 5.539793e-02
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg
## 2.008712e-02
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg
## -4.797254e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg
## -1.972468e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg
## 5.425500e-02
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg
## 1.960550e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg
## -2.851359e-03
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg
## 7.576853e-02
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg
## -1.459145e-02
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg
## -1.572117e-03
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg
## -5.822343e-03
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg
## -2.620793e-02
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg
## 6.967960e-03
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg
## -1.121550e-02
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg
## 9.748270e-03
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg
## 1.194560e-02
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg
## -6.191955e-03
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg
## -1.375524e-02
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg
## 4.341873e-02
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg
## 2.373298e-02
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg
## -4.710284e-03
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg
## -1.303938e-02
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg
## -9.197451e-04
## Prediction
## Reference N Y
## N 5147 292
## Y 327 766
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.052358e-01 6.555235e-01 8.978744e-01 9.122330e-01 8.326699e-01
## AccuracyPValue McnemarPValue
## 6.588950e-64 1.717584e-01
## id
## 1 Final##rcv#glmnet
## feats
## 1 WordCount.root2,WordCount.log1p,NDSSName.my.fctr,PubDate.day.minutes.poly.1,PubDate.hour.fctr,PubDate.wkend,PubDate.day.minutes.poly.4,PubDate.day.minutes.poly.2,PubDate.last4.log1p,PubDate.last2.log1p,PubDate.last8.log1p,PubDate.last16.log1p,PubDate.day.minutes.poly.3,PubDate.month.fctr,PubDate.juliandate,.rnorm,PubDate.last32.log1p,PubDate.date.fctr,PubDate.second.fctr,PubDate.minute.fctr,PubDate.wkday.fctr,WordCount.nexp,PubDate.day.minutes.poly.5,NDSSName.my.fctr:PubDate.last32.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.3.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.4.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.5.ctg,NDSSName.my.fctr:PubDate.last16.log1p.ctg,NDSSName.my.fctr:PubDate.last2.log1p.ctg,NDSSName.my.fctr:PubDate.last4.log1p.ctg,NDSSName.my.fctr:PubDate.last8.log1p.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.2.ctg,NDSSName.my.fctr:PubDate.day.minutes.poly.1.ctg
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 25 415.082 13.074
## max.AUCpROC.fit max.Sens.fit max.Spec.fit max.AUCROCR.fit
## 1 0.7990913 0.9641478 0.6340348 0.9349497
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.3 0.7122269 0.9052358
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.8978744 0.912233 0.6311556
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.004379568 0.02133064
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=FALSE)
## label step_major step_minor label_minor bgn end
## 14 fit.data.training 7 0 0 357.541 804.099
## 15 fit.data.training 7 1 1 804.100 NA
## elapsed
## 14 446.559
## 15 NA
#stop(here"); glb2Sav()
if (glb_is_classification && glb_is_binomial)
prob_threshold <- glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"] else
prob_threshold <- NULL
if (grepl("Ensemble", glb_fin_mdl_id)) {
# Get predictions for each model in ensemble; Outliers that have been moved to OOB might not have been predicted yet
mdlEnsembleComps <- unlist(str_split(subset(glb_models_df,
id == glb_fin_mdl_id)$feats, ","))
if (glb_is_classification && glb_is_binomial)
mdlEnsembleComps <- gsub("\\.prob$", "", mdlEnsembleComps)
mdlEnsembleComps <- gsub(paste0("^",
gsub(".", "\\.", mygetPredictIds(glb_rsp_var)$value, fixed = TRUE)),
"", mdlEnsembleComps)
for (mdl_id in mdlEnsembleComps) {
glbObsTrn <- glb_get_predictions(df = glbObsTrn, mdl_id = mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
glbObsNew <- glb_get_predictions(df = glbObsNew, mdl_id = mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
}
}
glbObsTrn <- glb_get_predictions(df = glbObsTrn, mdl_id = glb_fin_mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold)
## Warning in glb_get_predictions(df = glbObsTrn, mdl_id = glb_fin_mdl_id, :
## Using default probability threshold: 0.1
glb_featsimp_df <- myget_feats_importance(mdl=glb_fin_mdl,
featsimp_df=glb_featsimp_df)
glb_featsimp_df[, paste0(glb_fin_mdl_id, ".imp")] <- glb_featsimp_df$imp
print(glb_featsimp_df)
## All.X##rcv#glmnet.imp
## PubDate.day.minutes.poly.1 100.00000
## PubDate.day.minutes.poly.2 70.97396
## PubDate.day.minutes.poly.4 55.19343
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 66.84933
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 88.15594
## PubDate.day.minutes.poly.3 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 47.89992
## NDSSName.my.fctr#Opinion#ThePublicEditor 50.37025
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 57.58351
## NDSSName.my.fctrScience#Health# 51.70030
## NDSSName.my.fctrOpEd#Opinion# 51.25612
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 55.69412
## NDSSName.my.fctrBusiness#Crosswords/Games# 50.15826
## NDSSName.my.fctrStyles#U.S.# 50.13310
## PubDate.wkend 45.38153
## WordCount.log1p 45.64733
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 45.00131
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 44.88522
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 44.94176
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 44.79591
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 44.94701
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 44.93139
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 44.81961
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 44.88194
## WordCount.root2 44.63210
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 44.98345
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 44.87622
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 44.76367
## PubDate.last16.log1p 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 44.79069
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 44.76087
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 44.74633
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 44.76239
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 44.46899
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 44.81554
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 44.50280
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 44.45238
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 44.82052
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 44.67727
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 44.76793
## PubDate.wkday.fctr1 44.42049
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 44.72830
## PubDate.last2.log1p 44.47824
## .rnorm 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 53.46899
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 44.69504
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 44.71210
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology# 44.64238
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture## 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts# 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region# 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 50.91035
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 44.68497
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 44.39875
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 44.41767
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 44.61881
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 44.59532
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel# 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 44.42049
## PubDate.date.fctr(13,19] 44.42049
## PubDate.date.fctr(19,25] 44.42049
## PubDate.date.fctr(25,31] 44.42049
## PubDate.date.fctr(7,13] 44.42049
## PubDate.day.minutes.poly.5 44.42049
## PubDate.hour.fctr(15.3,23] 44.58457
## PubDate.hour.fctr(7.67,15.3] 44.42049
## PubDate.last32.log1p 44.42049
## PubDate.last4.log1p 44.45424
## PubDate.last8.log1p 44.42049
## PubDate.minute.fctr(14.8,29.5] 44.42049
## PubDate.minute.fctr(44.2,59.1] 44.42049
## PubDate.month.fctr10 44.42049
## PubDate.month.fctr11 44.42049
## PubDate.month.fctr12 44.42049
## PubDate.second.fctr(14.8,29.5] 44.42049
## PubDate.second.fctr(29.5,44.2] 44.42049
## PubDate.second.fctr(44.2,59.1] 44.42049
## PubDate.wkday.fctr2 44.42049
## PubDate.wkday.fctr3 44.42049
## PubDate.wkday.fctr4 44.42049
## PubDate.wkday.fctr6 44.42049
## WordCount.nexp 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 44.42049
## PubDate.juliandate 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 44.41247
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 44.40744
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 44.37301
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 44.40130
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 44.40131
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 44.40155
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 44.36295
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 44.39148
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 44.39068
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 44.38427
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 44.36011
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 44.35398
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 44.35927
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 44.38266
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 44.34877
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 44.37617
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 44.38058
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 44.37363
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 44.38183
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 44.33746
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 44.34392
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 44.33922
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 44.33234
## PubDate.minute.fctr(29.5,44.2] 44.42049
## PubDate.wkday.fctr5 44.39981
## NDSSName.my.fctr#Multimedia# 44.42049
## NDSSName.my.fctrmyOther 44.42049
## NDSSName.my.fctrForeign#World# 44.42049
## NDSSName.my.fctrTStyle## 43.56802
## NDSSName.my.fctrForeign#World#AsiaPacific 44.00596
## NDSSName.my.fctr#U.S.#Education 43.89767
## NDSSName.my.fctrStyles##Fashion 44.02222
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate 43.41965
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg 0.00000
## imp
## PubDate.day.minutes.poly.1 100.00000
## PubDate.day.minutes.poly.2 94.99861
## PubDate.day.minutes.poly.4 80.32362
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 79.92646
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 76.35014
## PubDate.day.minutes.poly.3 69.56878
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 69.33717
## NDSSName.my.fctr#Opinion#ThePublicEditor 68.43180
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 68.23193
## NDSSName.my.fctrScience#Health# 67.75545
## NDSSName.my.fctrOpEd#Opinion# 66.72750
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 66.33201
## NDSSName.my.fctrBusiness#Crosswords/Games# 65.82644
## NDSSName.my.fctrStyles#U.S.# 65.72658
## PubDate.wkend 63.40967
## WordCount.log1p 63.35579
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 62.71138
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 62.70422
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 62.69663
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 62.66614
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 62.64524
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 62.63968
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 62.63671
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 62.63125
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 62.63024
## WordCount.root2 62.61255
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 62.60862
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 62.60155
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 62.59625
## PubDate.last16.log1p 62.59083
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 62.55716
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 62.55673
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 62.53825
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 62.52533
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 62.52431
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 62.51548
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 62.51309
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 62.49862
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 62.49753
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 62.49439
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 62.49079
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 62.48485
## PubDate.wkday.fctr1 62.47973
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 62.47326
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 62.46260
## PubDate.last2.log1p 62.46192
## .rnorm 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology# 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture## 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts# 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region# 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel# 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 62.46064
## PubDate.date.fctr(13,19] 62.46064
## PubDate.date.fctr(19,25] 62.46064
## PubDate.date.fctr(25,31] 62.46064
## PubDate.date.fctr(7,13] 62.46064
## PubDate.day.minutes.poly.5 62.46064
## PubDate.hour.fctr(15.3,23] 62.46064
## PubDate.hour.fctr(7.67,15.3] 62.46064
## PubDate.last32.log1p 62.46064
## PubDate.last4.log1p 62.46064
## PubDate.last8.log1p 62.46064
## PubDate.minute.fctr(14.8,29.5] 62.46064
## PubDate.minute.fctr(44.2,59.1] 62.46064
## PubDate.month.fctr10 62.46064
## PubDate.month.fctr11 62.46064
## PubDate.month.fctr12 62.46064
## PubDate.second.fctr(14.8,29.5] 62.46064
## PubDate.second.fctr(29.5,44.2] 62.46064
## PubDate.second.fctr(44.2,59.1] 62.46064
## PubDate.wkday.fctr2 62.46064
## PubDate.wkday.fctr3 62.46064
## PubDate.wkday.fctr4 62.46064
## PubDate.wkday.fctr6 62.46064
## WordCount.nexp 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 62.46061
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 62.46034
## PubDate.juliandate 62.46014
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 62.46001
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 62.45953
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 62.45895
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 62.45872
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 62.45866
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 62.45805
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 62.45737
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 62.45737
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 62.45604
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 62.45255
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 62.44734
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 62.44664
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 62.44657
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 62.44630
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 62.44232
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 62.44227
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 62.43957
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 62.43364
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 62.43340
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 62.43254
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 62.42546
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 62.42450
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 62.41912
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 62.41764
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 62.41722
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 62.41424
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 62.41173
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 62.40078
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 62.40018
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 62.39911
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 62.39835
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 62.39674
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 62.37735
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 62.37645
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 62.33894
## PubDate.minute.fctr(29.5,44.2] 62.31764
## PubDate.wkday.fctr5 62.24996
## NDSSName.my.fctr#Multimedia# 62.18062
## NDSSName.my.fctrmyOther 62.14866
## NDSSName.my.fctrForeign#World# 62.11162
## NDSSName.my.fctrTStyle## 61.91481
## NDSSName.my.fctrForeign#World#AsiaPacific 61.85382
## NDSSName.my.fctr#U.S.#Education 61.72771
## NDSSName.my.fctrStyles##Fashion 61.38622
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 60.96200
## NDSSName.my.fctr#Opinion#RoomForDebate 60.55380
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 59.93848
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg 0.00000
## Final##rcv#glmnet.imp
## PubDate.day.minutes.poly.1 100.00000
## PubDate.day.minutes.poly.2 94.99861
## PubDate.day.minutes.poly.4 80.32362
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 79.92646
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 76.35014
## PubDate.day.minutes.poly.3 69.56878
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 69.33717
## NDSSName.my.fctr#Opinion#ThePublicEditor 68.43180
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 68.23193
## NDSSName.my.fctrScience#Health# 67.75545
## NDSSName.my.fctrOpEd#Opinion# 66.72750
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 66.33201
## NDSSName.my.fctrBusiness#Crosswords/Games# 65.82644
## NDSSName.my.fctrStyles#U.S.# 65.72658
## PubDate.wkend 63.40967
## WordCount.log1p 63.35579
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 62.71138
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 62.70422
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 62.69663
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 62.66614
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 62.64524
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 62.63968
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 62.63671
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 62.63125
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 62.63024
## WordCount.root2 62.61255
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 62.60862
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 62.60155
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 62.59625
## PubDate.last16.log1p 62.59083
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 62.55716
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 62.55673
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 62.53825
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 62.52533
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 62.52431
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 62.51548
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 62.51309
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 62.49862
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 62.49753
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 62.49439
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 62.49079
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 62.48485
## PubDate.wkday.fctr1 62.47973
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 62.47326
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 62.46260
## PubDate.last2.log1p 62.46192
## .rnorm 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology# 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture## 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts# 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region# 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel# 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 62.46064
## PubDate.date.fctr(13,19] 62.46064
## PubDate.date.fctr(19,25] 62.46064
## PubDate.date.fctr(25,31] 62.46064
## PubDate.date.fctr(7,13] 62.46064
## PubDate.day.minutes.poly.5 62.46064
## PubDate.hour.fctr(15.3,23] 62.46064
## PubDate.hour.fctr(7.67,15.3] 62.46064
## PubDate.last32.log1p 62.46064
## PubDate.last4.log1p 62.46064
## PubDate.last8.log1p 62.46064
## PubDate.minute.fctr(14.8,29.5] 62.46064
## PubDate.minute.fctr(44.2,59.1] 62.46064
## PubDate.month.fctr10 62.46064
## PubDate.month.fctr11 62.46064
## PubDate.month.fctr12 62.46064
## PubDate.second.fctr(14.8,29.5] 62.46064
## PubDate.second.fctr(29.5,44.2] 62.46064
## PubDate.second.fctr(44.2,59.1] 62.46064
## PubDate.wkday.fctr2 62.46064
## PubDate.wkday.fctr3 62.46064
## PubDate.wkday.fctr4 62.46064
## PubDate.wkday.fctr6 62.46064
## WordCount.nexp 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 62.46061
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 62.46034
## PubDate.juliandate 62.46014
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 62.46001
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 62.45953
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 62.45895
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 62.45872
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 62.45866
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 62.45805
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 62.45737
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 62.45737
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 62.45604
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 62.45255
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 62.44734
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 62.44664
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 62.44657
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 62.44630
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 62.44232
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 62.44227
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 62.43957
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 62.43364
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 62.43340
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 62.43254
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 62.42546
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 62.42450
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 62.41912
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 62.41764
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 62.41722
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 62.41424
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 62.41173
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 62.40078
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 62.40018
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 62.39911
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 62.39835
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 62.39674
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 62.37735
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 62.37645
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 62.33894
## PubDate.minute.fctr(29.5,44.2] 62.31764
## PubDate.wkday.fctr5 62.24996
## NDSSName.my.fctr#Multimedia# 62.18062
## NDSSName.my.fctrmyOther 62.14866
## NDSSName.my.fctrForeign#World# 62.11162
## NDSSName.my.fctrTStyle## 61.91481
## NDSSName.my.fctrForeign#World#AsiaPacific 61.85382
## NDSSName.my.fctr#U.S.#Education 61.72771
## NDSSName.my.fctrStyles##Fashion 61.38622
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 60.96200
## NDSSName.my.fctr#Opinion#RoomForDebate 60.55380
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 59.93848
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.2.ctg 0.00000
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df=glbObsTrn, mdl_id=glb_fin_mdl_id,
prob_threshold=glb_models_df[glb_models_df$id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df=glbObsTrn, mdl_id=glb_fin_mdl_id)
## Warning in glb_analytics_diag_plots(obs_df = glbObsTrn, mdl_id =
## glb_fin_mdl_id, : Limiting important feature scatter plots to 5 out of 33
## [1] "Min/Max Boundaries: "
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 1 1065 N NA
## 2 4168 N 0.03207544
## 3 5647 N 0.10145544
## 4 302 N NA
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 1 <NA> NA
## 2 N FALSE
## 3 Y TRUE
## 4 <NA> NA
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 1 NA
## 2 0.03207544
## 3 0.10145544
## 4 NA
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 1 NA
## 2 TRUE
## 3 FALSE
## 4 NA
## Popular.fctr.Final..rcv.glmnet.prob Popular.fctr.Final..rcv.glmnet
## 1 0.04978385 N
## 2 0.01242209 N
## 3 0.12124692 Y
## 4 0.37492215 Y
## Popular.fctr.Final..rcv.glmnet.err
## 1 FALSE
## 2 FALSE
## 3 TRUE
## 4 TRUE
## Popular.fctr.Final..rcv.glmnet.err.abs
## 1 0.04978385
## 2 0.01242209
## 3 0.12124692
## 4 0.37492215
## Popular.fctr.Final..rcv.glmnet.is.acc
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## Popular.fctr.Final..rcv.glmnet.accurate
## 1 TRUE
## 2 TRUE
## 3 FALSE
## 4 FALSE
## Popular.fctr.Final..rcv.glmnet.error .label
## 1 0.00000000 1065
## 2 0.00000000 4168
## 3 0.02124692 5647
## 4 0.27492215 302
## [1] "Inaccurate: "
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 1 2182 Y 0.02635955
## 2 4352 Y NA
## 3 4721 Y 0.05256214
## 4 364 Y 0.04568909
## 5 172 Y NA
## 6 3554 Y NA
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 1 N TRUE
## 2 <NA> NA
## 3 N TRUE
## 4 N TRUE
## 5 <NA> NA
## 6 <NA> NA
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 1 0.9736405
## 2 NA
## 3 0.9474379
## 4 0.9543109
## 5 NA
## 6 NA
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 1 FALSE
## 2 NA
## 3 FALSE
## 4 FALSE
## 5 NA
## 6 NA
## Popular.fctr.Final..rcv.glmnet.prob Popular.fctr.Final..rcv.glmnet
## 1 0.01202850 N
## 2 0.02449434 N
## 3 0.02479309 N
## 4 0.03074256 N
## 5 0.03317925 N
## 6 0.03332152 N
## Popular.fctr.Final..rcv.glmnet.err
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
## Popular.fctr.Final..rcv.glmnet.err.abs
## 1 0.9879715
## 2 0.9755057
## 3 0.9752069
## 4 0.9692574
## 5 0.9668207
## 6 0.9666785
## Popular.fctr.Final..rcv.glmnet.is.acc
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## Popular.fctr.Final..rcv.glmnet.accurate
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## Popular.fctr.Final..rcv.glmnet.error
## 1 -0.08797150
## 2 -0.07550566
## 3 -0.07520691
## 4 -0.06925744
## 5 -0.06682075
## 6 -0.06667848
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 200 3491 N NA
## 231 4699 N NA
## 400 1051 N NA
## 432 125 N 0.1226019
## 952 4536 N NA
## 1162 5151 N NA
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 200 <NA> NA
## 231 <NA> NA
## 400 <NA> NA
## 432 Y TRUE
## 952 <NA> NA
## 1162 <NA> NA
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 200 NA
## 231 NA
## 400 NA
## 432 0.1226019
## 952 NA
## 1162 NA
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 200 NA
## 231 NA
## 400 NA
## 432 FALSE
## 952 NA
## 1162 NA
## Popular.fctr.Final..rcv.glmnet.prob Popular.fctr.Final..rcv.glmnet
## 200 0.1086826 Y
## 231 0.1113771 Y
## 400 0.1268319 Y
## 432 0.1304708 Y
## 952 0.2860456 Y
## 1162 0.6958676 Y
## Popular.fctr.Final..rcv.glmnet.err
## 200 TRUE
## 231 TRUE
## 400 TRUE
## 432 TRUE
## 952 TRUE
## 1162 TRUE
## Popular.fctr.Final..rcv.glmnet.err.abs
## 200 0.1086826
## 231 0.1113771
## 400 0.1268319
## 432 0.1304708
## 952 0.2860456
## 1162 0.6958676
## Popular.fctr.Final..rcv.glmnet.is.acc
## 200 FALSE
## 231 FALSE
## 400 FALSE
## 432 FALSE
## 952 FALSE
## 1162 FALSE
## Popular.fctr.Final..rcv.glmnet.accurate
## 200 FALSE
## 231 FALSE
## 400 FALSE
## 432 FALSE
## 952 FALSE
## 1162 FALSE
## Popular.fctr.Final..rcv.glmnet.error
## 200 0.008682565
## 231 0.011377070
## 400 0.026831924
## 432 0.030470810
## 952 0.186045609
## 1162 0.595867596
## UniqueID Popular.fctr Popular.fctr.All.X..rcv.glmnet.prob
## 1244 1241 N 0.8753344
## 1245 6235 N NA
## 1246 4943 N NA
## 1247 2995 N NA
## 1248 1612 N 0.9115272
## 1249 3590 N NA
## Popular.fctr.All.X..rcv.glmnet Popular.fctr.All.X..rcv.glmnet.err
## 1244 Y TRUE
## 1245 <NA> NA
## 1246 <NA> NA
## 1247 <NA> NA
## 1248 Y TRUE
## 1249 <NA> NA
## Popular.fctr.All.X..rcv.glmnet.err.abs
## 1244 0.8753344
## 1245 NA
## 1246 NA
## 1247 NA
## 1248 0.9115272
## 1249 NA
## Popular.fctr.All.X..rcv.glmnet.is.acc
## 1244 FALSE
## 1245 NA
## 1246 NA
## 1247 NA
## 1248 FALSE
## 1249 NA
## Popular.fctr.Final..rcv.glmnet.prob Popular.fctr.Final..rcv.glmnet
## 1244 0.9201706 Y
## 1245 0.9263802 Y
## 1246 0.9327631 Y
## 1247 0.9344071 Y
## 1248 0.9489711 Y
## 1249 0.9586294 Y
## Popular.fctr.Final..rcv.glmnet.err
## 1244 TRUE
## 1245 TRUE
## 1246 TRUE
## 1247 TRUE
## 1248 TRUE
## 1249 TRUE
## Popular.fctr.Final..rcv.glmnet.err.abs
## 1244 0.9201706
## 1245 0.9263802
## 1246 0.9327631
## 1247 0.9344071
## 1248 0.9489711
## 1249 0.9586294
## Popular.fctr.Final..rcv.glmnet.is.acc
## 1244 FALSE
## 1245 FALSE
## 1246 FALSE
## 1247 FALSE
## 1248 FALSE
## 1249 FALSE
## Popular.fctr.Final..rcv.glmnet.accurate
## 1244 FALSE
## 1245 FALSE
## 1246 FALSE
## 1247 FALSE
## 1248 FALSE
## 1249 FALSE
## Popular.fctr.Final..rcv.glmnet.error
## 1244 0.8201706
## 1245 0.8263802
## 1246 0.8327631
## 1247 0.8344071
## 1248 0.8489711
## 1249 0.8586294
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
dsp_feats_vctr <- c(NULL)
for(var in grep(".imp", names(glb_feats_df), fixed=TRUE, value=TRUE))
dsp_feats_vctr <- union(dsp_feats_vctr,
glb_feats_df[!is.na(glb_feats_df[, var]), "id"])
# print(glbObsTrn[glbObsTrn$UniqueID %in% FN_OOB_ids,
# grep(glb_rsp_var, names(glbObsTrn), value=TRUE)])
print(setdiff(names(glbObsTrn), names(glbObsAll)))
## [1] "Popular.fctr.Final..rcv.glmnet.prob"
## [2] "Popular.fctr.Final..rcv.glmnet"
## [3] "Popular.fctr.Final..rcv.glmnet.err"
## [4] "Popular.fctr.Final..rcv.glmnet.err.abs"
## [5] "Popular.fctr.Final..rcv.glmnet.is.acc"
for (col in setdiff(names(glbObsTrn), names(glbObsAll)))
# Merge or cbind ?
glbObsAll[glbObsAll$.src == "Train", col] <- glbObsTrn[, col]
print(setdiff(names(glbObsFit), names(glbObsAll)))
## character(0)
print(setdiff(names(glbObsOOB), names(glbObsAll)))
## character(0)
for (col in setdiff(names(glbObsOOB), names(glbObsAll)))
# Merge or cbind ?
glbObsAll[glbObsAll$.lcn == "OOB", col] <- glbObsOOB[, col]
print(setdiff(names(glbObsNew), names(glbObsAll)))
## character(0)
if (glb_save_envir)
save(glb_feats_df, glbObsAll,
#glbObsTrn, glbObsFit, glbObsOOB, glbObsNew,
glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
glb_sel_mdl, glb_sel_mdl_id,
glb_fin_mdl, glb_fin_mdl_id,
file=paste0(glb_out_pfx, "dsk.RData"))
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all.prediction","model.final")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
## 3.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: data.training.all.prediction
## 4.0000 5 0 1 1 1
## 4.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: model.final
## 5.0000 4 0 0 2 1
glb_chunks_df <- myadd_chunk(glb_chunks_df, "predict.data.new", major.inc=TRUE)
## label step_major step_minor label_minor bgn end
## 15 fit.data.training 7 1 1 804.100 814.508
## 16 predict.data.new 8 0 0 814.509 NA
## elapsed
## 15 10.409
## 16 NA
8.0: predict data new# Compute final model predictions
#glb2Sav(); all.equal(savObsAll, glbObsAll); all.equal(savObsTrn, glbObsTrn); all.equal(savObsNew, glbObsNew)
if (glb_is_classification && glb_is_binomial)
prob_threshold_def <-
glb_models_df[glb_models_df$id == glb_sel_mdl_id, "opt.prob.threshold.OOB"] else
prob_threshold_def <- NULL
for (obsSet in c("trn", "new")) {
obs_df <- switch(obsSet, all = glbObsAll, trn = glbObsTrn, new = glbObsNew)
obs_df <- glb_get_predictions(obs_df, mdl_id = glb_fin_mdl_id,
rsp_var = glb_rsp_var,
prob_threshold_def = prob_threshold_def)
if (obsSet == "all") glbObsAll <- obs_df else
if (obsSet == "trn") glbObsTrn <- obs_df else
if (obsSet == "new") glbObsNew <- obs_df
}
## Warning in glb_get_predictions(obs_df, mdl_id = glb_fin_mdl_id, rsp_var =
## glb_rsp_var, : Using default probability threshold: 0.1
## Warning in glb_get_predictions(obs_df, mdl_id = glb_fin_mdl_id, rsp_var =
## glb_rsp_var, : Using default probability threshold: 0.1
rm(obs_df)
glbObsAll <- orderBy(reformulate(glb_id_var), myrbind_df(glbObsTrn, glbObsNew))
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
## Warning in zoo(rval[i], index(x)[i]): some methods for "zoo" objects do not
## work if the index entries in 'order.by' are not unique
glb_analytics_diag_plots(obs_df = glbObsNew, mdl_id = glb_fin_mdl_id,
prob_threshold = prob_threshold_def)
## Warning in glb_analytics_diag_plots(obs_df = glbObsNew, mdl_id =
## glb_fin_mdl_id, : Limiting important feature scatter plots to 5 out of 33
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## Warning: Removed 1870 rows containing missing values (geom_point).
## NULL
if (is.null(glb_out_obs)) obs_df <- glbObsNew else
obs_df <- switch(glb_out_obs,
all = glbObsAll, trn = glbObsTrn, new = glbObsNew)
require(stringr)
## Loading required package: stringr
obsout_df <- obs_df[, glb_id_var, FALSE]
for (clmn in names(glb_out_vars_lst))
if (!grepl("^%<d-%", glb_out_vars_lst[[clmn]]))
obsout_df[, clmn] <- obs_df[, glb_out_vars_lst[[clmn]]] else {
feat <- str_trim(unlist(strsplit(glb_out_vars_lst[[clmn]], "%<d-%"))[2])
obsout_df[, clmn] <- obs_df[, eval(parse(text = feat))]
}
if (glb_is_classification) {
rsp_var_out <- paste0(mygetPredictIds(glb_rsp_var)$value, glb_fin_mdl_id)
if (".grpid" %in% names(glbObsNew)) {
# Dups were found in glbObsAll
tmp_newobs_df <- subset(glbObsNew[, c(glb_id_var, ".grpid", rsp_var_out)],
!is.na(.grpid))
tmp_newobs_df <-
merge(tmp_newobs_df, dupgrps_df, by = ".grpid", all.x = TRUE)
tmp_newobs_df <-
merge(tmp_newobs_df, obsout_df, by = glb_id_var, all.x = TRUE)
tmp_newobs_df$.err <-
((tmp_newobs_df$Probability1 > 0.5) & (tmp_newobs_df$sold.0 > 0) |
(tmp_newobs_df$Probability1 < 0.5) & (tmp_newobs_df$sold.1 > 0))
tmp_newobs_df <- orderBy(~UniqueID, subset(tmp_newobs_df, .err == TRUE))
print(sprintf("ObsNew Prediction errors in duplicates: %d",
nrow(tmp_newobs_df)))
print(tmp_newobs_df)
}
# Check for prediction errors based on category X glb_rsp_var distribution
TrnLvlCategory <-
mycreate_sqlxtab_df(glbObsTrn, c(glbFeatsCategory, glb_rsp_var)) %>%
tidyr::spread_(glb_rsp_var, ".n")
names(TrnLvlCategory)[2:ncol(TrnLvlCategory)] <-
paste(".n.Trn.", names(TrnLvlCategory)[2:ncol(TrnLvlCategory)], sep = "")
glbLvlCategory <-
merge(glbLvlCategory, TrnLvlCategory, by = glbFeatsCategory, all.x = TRUE)
predctId <- mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value
NewLvlCategory <-
mycreate_sqlxtab_df(glbObsNew, c(glbFeatsCategory, predctId)) %>%
tidyr::spread_(predctId, ".n")
names(NewLvlCategory)[2:ncol(NewLvlCategory)] <-
paste(".n.New.", names(NewLvlCategory)[2:ncol(NewLvlCategory)], sep = "")
glbLvlCategory <-
merge(glbLvlCategory, NewLvlCategory, by = glbFeatsCategory, all.x = TRUE)
tmpLvlCategory <- glbLvlCategory[, c(glbFeatsCategory,
grep("\\.n\\.(.+)\\.", names(glbLvlCategory), value = TRUE))]
tmpLvlCategory <- tmpLvlCategory[rowSums(is.na(tmpLvlCategory)) > 0, ]
errLvlIx <- c(NULL)
for (clss in unique(glbObsTrn[, glb_rsp_var])) {
newClmn <- paste0(".n.New.", as.character(clss))
trnClmn <- paste0(".n.Trn.", as.character(clss))
errLvlIx <- union(errLvlIx,
which(!is.na(tmpLvlCategory[, newClmn]) &
(tmpLvlCategory[, newClmn] > 0) &
is.na(tmpLvlCategory[, trnClmn])))
}
if (length(errLvlIx) > 0) {
print("ObsNew Prediction errors in categories:")
print(tmpLvlCategory[errLvlIx, ])
print(colSums(tmpLvlCategory[errLvlIx, -1], na.rm = TRUE))
# By definition .err columns will be NA !!!
#predctId <- mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value; errObsNew <- glbObsNew[(glbObsNew[, glbFeatsCategory] %in% tmpLvlCategory[, 1]), ];
#myprint_df(errObsNew[(errObsNew[, glbFeatsCategory] %in% tmpLvlCategory[errLvlIx, glbFeatsCategory]) & (errObsNew[, predctId] == "Y"), union(c(glb_id_var, glbFeatsCategory, predctId), myextract_actual_feats(row.names(glb_featsimp_df[glb_featsimp_df[, paste0(glb_fin_mdl_id, ".imp")] > 10, ])))])
#myprint_df(errObsNew[(errObsNew[, glbFeatsCategory] %in% "Foreign#World#") & (errObsNew[, predctId] == "Y"), union(c(glb_id_var, glbFeatsCategory, predctId), myextract_actual_feats(row.names(glb_featsimp_df[glb_featsimp_df[, paste0(glb_fin_mdl_id, ".imp")] > 10, ])))])
}
tmp_newobs_df <- cbind(glbObsNew, obsout_df[, "Probability1", FALSE])
# Check predictions that are outside of data ranges
#stop(here")
require(stringr)
tmp_feats_df <- subset(glb_feats_df,
!nzv &
(exclude.as.feat != 1) &
!grepl(".fctr", id, fixed=TRUE))[, "id", FALSE]
ranges_all_df <- glbObsAll[, tmp_feats_df$id] %>%
dplyr::summarise_each(funs(min(., na.rm=TRUE),
max(., na.rm=TRUE))) %>%
tidyr::gather() %>%
dplyr::mutate(id=str_sub(key, 1, -5),
stat=str_sub(key, -3)) %>%
dplyr::select(-key) %>%
tidyr::spread(stat, value)
# sav_ranges_trn_df <- ranges_trn_df; all.equal(sav_ranges_trn_df, ranges_trn_df)
# sav_ranges_new_df <- ranges_new_df; all.equal(sav_ranges_new_df, ranges_new_df)
get_ranges_df <- function(obs_df, feats, class_var) {
require(tidyr)
tmpClass <- gsub("\\*", "_", class_var)
tmpObs <- data.frame(.tmpClass = obs_df[, class_var])
names(tmpObs) <- tmpClass
tmpObs <- cbind(tmpObs, obs_df[, feats, FALSE])
ranges_df <- tmpObs %>%
dplyr::group_by_(tmpClass) %>%
dplyr::summarise_each(funs(min(., na.rm=TRUE),
max(., na.rm=TRUE))) %>%
tidyr::gather(key, value, -1) %>%
mutate(id=str_sub(key, 1, -5),
stat.vname=paste0(str_sub(key, -3), ".", tmpClass)) %>%
unite_("stat.class", c("stat.vname", tmpClass), sep=".") %>%
dplyr::select(-key) %>%
spread(stat.class, value)
return(ranges_df)
}
rsp_var_out_OOB <- mygetPredictIds(glb_rsp_var, glb_sel_mdl_id)$value
rsp_var_out_new <- mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value
ranges_trn_df <- get_ranges_df(obs_df=glbObsTrn, feats=tmp_feats_df$id,
class_var=glb_rsp_var)
ranges_fit_df <- get_ranges_df(obs_df=glbObsFit, feats=tmp_feats_df$id,
class_var=glb_rsp_var)
ranges_OOB_df <- get_ranges_df(obs_df = glbObsOOB, feats = tmp_feats_df$id,
class_var = rsp_var_out_OOB)
ranges_new_df <- get_ranges_df(obs_df=glbObsNew, feats=tmp_feats_df$id,
class_var=rsp_var_out_new)
for (obsset in c("OOB", "new")) {
if (obsset == "OOB") {
ranges_ref_df <- ranges_fit_df; obs_df <- glbObsOOB;
rsp_var_out_obs <- rsp_var_out_OOB; sprintf_pfx <- "OOBobs";
} else {
ranges_ref_df <- ranges_trn_df; obs_df <- glbObsNew;
rsp_var_out_obs <- rsp_var_out_new; sprintf_pfx <- "newobs";
}
plt_feats_df <- glb_feats_df %>%
merge(ranges_all_df, all=TRUE) %>%
merge(ranges_ref_df, all=TRUE) %>%
merge(ranges_OOB_df, all=TRUE) %>%
merge(ranges_new_df, all=TRUE) %>%
subset(!is.na(min) & (id != ".rnorm"))
row.names(plt_feats_df) <- plt_feats_df$id
range_outlier_ids <- c(NULL)
for (clss in unique(obs_df[, rsp_var_out_obs])) {
tmp_rsp_var_out_obs <- gsub("\\*", "_", rsp_var_out_obs)
for (stat in c("min", "max")) {
if (stat == "min") {
dsp_feats <- plt_feats_df[
which(plt_feats_df[, paste("min", tmp_rsp_var_out_obs, clss, sep = ".")] <
plt_feats_df[, paste("min", glb_rsp_var, clss, sep = ".")]),
"id"]
} else {
dsp_feats <- plt_feats_df[
which(plt_feats_df[, paste("max", tmp_rsp_var_out_obs, clss, sep = ".")] >
plt_feats_df[, paste("max", glb_rsp_var, clss, sep = ".")]),
"id"]
}
if (length(dsp_feats) > 0) {
ths_ids <- c(NULL)
for (feat in dsp_feats) {
if (stat == "min") {
ths_ids <- union(ths_ids,
obs_df[(obs_df[, rsp_var_out_obs] == clss) &
(obs_df[, feat] <
plt_feats_df[plt_feats_df$id == feat,
paste("min", glb_rsp_var, clss, sep = ".")]),
glb_id_var])
} else {
ths_ids <- union(ths_ids,
obs_df[(obs_df[, rsp_var_out_obs] == clss) &
(obs_df[, feat] >
plt_feats_df[plt_feats_df$id == feat,
paste("max", glb_rsp_var, clss, sep = ".")]),
glb_id_var])
}
}
tmp_obs_df <- obs_df[obs_df[, glb_id_var] %in% ths_ids,
c(glb_id_var, rsp_var_out_obs, dsp_feats)]
if (stat == "min") {
print(sprintf("%s %s %s: min < min of Train range: %d",
sprintf_pfx, rsp_var_out_obs, clss, nrow(tmp_obs_df)))
} else {
print(sprintf("%s %s %s: max > max of Train range: %d",
sprintf_pfx, rsp_var_out_obs, clss, nrow(tmp_obs_df)))
}
myprint_df(tmp_obs_df)
print(subset(plt_feats_df, id %in% dsp_feats))
range_outlier_ids <- union(range_outlier_ids, ths_ids)
}
}
}
print(sprintf("%s total range outliers: %d", sprintf_pfx,
length(range_outlier_ids)))
}
}
## [1] "ObsNew Prediction errors in categories:"
## NDSSName.my.fctr .n.Trn.N .n.Trn.Y .n.New.N .n.New.Y
## 5 #U.S.#Education 325 NA 85 4
## 10 Culture## 1 NA 51 19
## 12 Foreign#World# 172 NA 46 1
## 21 myOther 38 NA 4 1
## .n.Trn.N .n.Trn.Y .n.New.N .n.New.Y
## 536 0 186 25
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
##
## The following object is masked from 'package:Matrix':
##
## expand
## [1] "OOBobs Popular.fctr.All.X..rcv.glmnet Y: min < min of Train range: 7"
## UniqueID Popular.fctr.All.X..rcv.glmnet
## 5431 5431 Y
## 1906 1906 Y
## 2645 2645 Y
## 6435 6435 Y
## 1767 1767 Y
## 4223 4223 Y
## 1930 1930 Y
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.2
## 5431 0.18988524 -0.003298637
## 1906 -0.28422208 -0.007994015
## 2645 0.04444564 0.012594250
## 6435 -0.02734665 0.002105741
## 1767 -0.02245774 -0.004384985
## 4223 -0.04543407 -0.008758791
## 1930 0.02392413 -0.007891784
## PubDate.day.minutes.poly.2.ctg PubDate.day.minutes.poly.4
## 5431 0.160017937 -0.004425843
## 1906 0.189926123 0.006563830
## 2645 0.035996050 -0.018240126
## 6435 -0.017879840 -0.010140008
## 1767 -0.201374531 -0.002340509
## 4223 -0.006162687 0.009518864
## 1930 -0.159715310 0.008326544
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.5.ctg
## 5431 -0.40164220 -0.54313226
## 1906 -0.30249370 0.41093522
## 2645 -0.01082616 -0.03079946
## 6435 0.02178124 -0.02033356
## 1767 0.33305455 -0.13083307
## 4223 -0.02181201 -0.01124082
## 1930 0.19588297 0.13994102
## WordCount.log1p WordCount.root2
## 5431 7.295056 38.36665
## 1906 7.160846 35.87478
## 2645 6.635947 27.58623
## 6435 0.000000 0.00000
## 1767 6.979145 32.75668
## 4223 7.274480 37.97368
## 1930 5.541264 15.93738
## id cor.y
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## PubDate.day.minutes.poly.2 PubDate.day.minutes.poly.2 0.070977720
## PubDate.day.minutes.poly.2.ctg PubDate.day.minutes.poly.2.ctg 0.003596414
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4 0.073941394
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.4.ctg 0.014601521
## PubDate.day.minutes.poly.5.ctg PubDate.day.minutes.poly.5.ctg 0.014574775
## WordCount.log1p WordCount.log1p 0.254319628
## WordCount.root2 WordCount.root2 0.292120679
## exclude.as.feat cor.y.abs cor.high.X
## PubDate.day.minutes.poly.1.ctg FALSE 0.002432289 <NA>
## PubDate.day.minutes.poly.2 FALSE 0.070977720 <NA>
## PubDate.day.minutes.poly.2.ctg FALSE 0.003596414 <NA>
## PubDate.day.minutes.poly.4 FALSE 0.073941394 <NA>
## PubDate.day.minutes.poly.4.ctg FALSE 0.014601521 <NA>
## PubDate.day.minutes.poly.5.ctg FALSE 0.014574775 <NA>
## WordCount.log1p FALSE 0.254319628 WordCount.root2
## WordCount.root2 FALSE 0.292120679 <NA>
## freqRatio percentUnique zeroVar nzv
## PubDate.day.minutes.poly.1.ctg 1.083333 53.96509 FALSE FALSE
## PubDate.day.minutes.poly.2 1.225490 18.08022 FALSE FALSE
## PubDate.day.minutes.poly.2.ctg 1.083333 53.94979 FALSE FALSE
## PubDate.day.minutes.poly.4 1.225490 18.08022 FALSE FALSE
## PubDate.day.minutes.poly.4.ctg 1.083333 53.94979 FALSE FALSE
## PubDate.day.minutes.poly.5.ctg 1.083333 53.94979 FALSE FALSE
## WordCount.log1p 2.315789 24.15799 FALSE FALSE
## WordCount.root2 2.315789 24.15799 FALSE FALSE
## is.cor.y.abs.low interaction.feat
## PubDate.day.minutes.poly.1.ctg TRUE NDSSName.my.fctr
## PubDate.day.minutes.poly.2 FALSE <NA>
## PubDate.day.minutes.poly.2.ctg TRUE NDSSName.my.fctr
## PubDate.day.minutes.poly.4 FALSE <NA>
## PubDate.day.minutes.poly.4.ctg FALSE NDSSName.my.fctr
## PubDate.day.minutes.poly.5.ctg FALSE NDSSName.my.fctr
## WordCount.log1p FALSE <NA>
## WordCount.root2 FALSE <NA>
## shapiro.test.p.value rsp_var_raw id_var
## PubDate.day.minutes.poly.1.ctg 1.051535e-45 FALSE NA
## PubDate.day.minutes.poly.2 8.020999e-64 FALSE NA
## PubDate.day.minutes.poly.2.ctg 2.302769e-65 FALSE NA
## PubDate.day.minutes.poly.4 1.523136e-47 FALSE NA
## PubDate.day.minutes.poly.4.ctg 2.214419e-67 FALSE NA
## PubDate.day.minutes.poly.5.ctg 7.171204e-67 FALSE NA
## WordCount.log1p 1.576866e-49 FALSE NA
## WordCount.root2 4.556481e-30 FALSE NA
## rsp_var max min
## PubDate.day.minutes.poly.1.ctg NA 0.48127714 -0.707011442
## PubDate.day.minutes.poly.2 NA 0.04268445 -0.008758791
## PubDate.day.minutes.poly.2.ctg NA 0.75539456 -0.221260607
## PubDate.day.minutes.poly.4 NA 0.06677441 -0.018327397
## PubDate.day.minutes.poly.4.ctg NA 0.67700049 -0.611884133
## PubDate.day.minutes.poly.5.ctg NA 0.56286316 -0.716534449
## WordCount.log1p NA 9.29771002 0.000000000
## WordCount.root2 NA 104.46051886 0.000000000
## max.Popular.fctr.N max.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg 0.35968907 0.32950245
## PubDate.day.minutes.poly.2 0.04268445 0.04254377
## PubDate.day.minutes.poly.2.ctg 0.75539456 0.43056671
## PubDate.day.minutes.poly.4 0.06543120 0.06149053
## PubDate.day.minutes.poly.4.ctg 0.67700049 0.28961875
## PubDate.day.minutes.poly.5.ctg 0.56286316 0.21585241
## WordCount.log1p 8.81966535 9.29771002
## WordCount.root2 82.24962006 104.46051886
## min.Popular.fctr.N min.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg -0.399776908 -0.245703803
## PubDate.day.minutes.poly.2 -0.008758791 -0.008758717
## PubDate.day.minutes.poly.2.ctg -0.221260607 -0.155122711
## PubDate.day.minutes.poly.4 -0.018327397 -0.018219595
## PubDate.day.minutes.poly.4.ctg -0.611884133 -0.282432189
## PubDate.day.minutes.poly.5.ctg -0.716534449 -0.370586479
## WordCount.log1p 0.000000000 1.945910149
## WordCount.root2 0.000000000 2.449489743
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.48127714
## PubDate.day.minutes.poly.2 0.04268445
## PubDate.day.minutes.poly.2.ctg 0.74739148
## PubDate.day.minutes.poly.4 0.06213811
## PubDate.day.minutes.poly.4.ctg 0.49508199
## PubDate.day.minutes.poly.5.ctg 0.48962874
## WordCount.log1p 7.65112018
## WordCount.root2 45.84757355
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.18988524
## PubDate.day.minutes.poly.2 0.04254377
## PubDate.day.minutes.poly.2.ctg 0.35042637
## PubDate.day.minutes.poly.4 0.06610094
## PubDate.day.minutes.poly.4.ctg 0.45727441
## PubDate.day.minutes.poly.5.ctg 0.41093522
## WordCount.log1p 9.14088311
## WordCount.root2 96.58157174
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.707011442
## PubDate.day.minutes.poly.2 -0.008758717
## PubDate.day.minutes.poly.2.ctg -0.201374531
## PubDate.day.minutes.poly.4 -0.018326850
## PubDate.day.minutes.poly.4.ctg -0.512960949
## PubDate.day.minutes.poly.5.ctg -0.457118863
## WordCount.log1p 0.000000000
## WordCount.root2 0.000000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.284222080
## PubDate.day.minutes.poly.2 -0.008758791
## PubDate.day.minutes.poly.2.ctg -0.201374531
## PubDate.day.minutes.poly.4 -0.018240126
## PubDate.day.minutes.poly.4.ctg -0.401642196
## PubDate.day.minutes.poly.5.ctg -0.543132262
## WordCount.log1p 0.000000000
## WordCount.root2 0.000000000
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.22609552
## PubDate.day.minutes.poly.2 0.04268445
## PubDate.day.minutes.poly.2.ctg 0.60416557
## PubDate.day.minutes.poly.4 0.05893666
## PubDate.day.minutes.poly.4.ctg 0.63819571
## PubDate.day.minutes.poly.5.ctg 0.45824974
## WordCount.log1p 7.94093976
## WordCount.root2 53.00000000
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.19641586
## PubDate.day.minutes.poly.2 0.04254377
## PubDate.day.minutes.poly.2.ctg 0.33756882
## PubDate.day.minutes.poly.4 0.06677441
## PubDate.day.minutes.poly.4.ctg 0.38235412
## PubDate.day.minutes.poly.5.ctg 0.42244492
## WordCount.log1p 8.69232228
## WordCount.root2 77.17512553
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.254654849
## PubDate.day.minutes.poly.2 -0.008758791
## PubDate.day.minutes.poly.2.ctg -0.191639985
## PubDate.day.minutes.poly.4 -0.018322678
## PubDate.day.minutes.poly.4.ctg -0.239606422
## PubDate.day.minutes.poly.5.ctg -0.354757272
## WordCount.log1p 0.000000000
## WordCount.root2 0.000000000
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.253130167
## PubDate.day.minutes.poly.2 -0.008758672
## PubDate.day.minutes.poly.2.ctg -0.189941446
## PubDate.day.minutes.poly.4 -0.018203392
## PubDate.day.minutes.poly.4.ctg -0.205244731
## PubDate.day.minutes.poly.5.ctg -0.280963223
## WordCount.log1p 1.609437912
## WordCount.root2 2.000000000
## [1] "OOBobs Popular.fctr.All.X..rcv.glmnet Y: max > max of Train range: 12"
## UniqueID Popular.fctr.All.X..rcv.glmnet PubDate.day.minutes.poly.1
## 1922 1922 Y -0.002039521
## 1627 1627 Y 0.012231324
## 1906 1906 Y 0.002281677
## 302 302 Y 0.024722851
## 3770 3770 Y 0.001918551
## 4466 4466 Y 0.005586121
## 6435 6435 Y -0.013151170
## 1767 1767 Y 0.006784437
## 1928 1928 Y -0.003709899
## 6517 6517 Y 0.020474279
## 3205 3205 Y 0.002898990
## 6521 6521 Y 0.013901702
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.3.ctg
## 1922 0.001236262 -0.15016859
## 1627 -0.007282868 -0.13040699
## 1906 -0.005100600 0.03046261
## 302 0.051829879 0.01497441
## 3770 -0.004619890 0.43176828
## 4466 -0.008578107 0.03634546
## 6435 0.010843928 0.01771752
## 1767 -0.009309113 0.02489965
## 1928 0.003766018 -0.12101585
## 6517 0.020825101 -0.01019734
## 3205 -0.005881110 0.01996895
## 6521 -0.004412350 -0.01394151
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4.ctg
## 1922 0.0096415269 -0.09688160
## 1627 -0.0128746495 -0.11643110
## 1906 0.0065638297 -0.30249370
## 302 0.0661009370 0.01396653
## 3770 0.0070741317 0.45727441
## 4466 0.0003590042 0.11223265
## 6435 -0.0101400077 0.02178124
## 1767 -0.0023405093 0.33305455
## 1928 0.0089316619 -0.23544576
## 6517 0.0101361259 0.02716643
## 3205 0.0056054933 0.01405419
## 6521 -0.0138030620 -0.03721490
## PubDate.day.minutes.poly.5 PubDate.day.minutes.poly.5.ctg
## 1922 -0.002098247 0.04932191
## 1627 -0.006115494 0.30341253
## 1906 0.007446867 0.41093522
## 302 0.083442278 -0.02109223
## 3770 0.006848725 0.38140927
## 4466 0.009641107 -0.01748172
## 6435 -0.001891593 -0.02033356
## 1767 0.008727735 -0.13083307
## 1928 -0.006056430 -0.21554955
## 6517 -0.004924177 0.01414065
## 3205 0.008323368 -0.02586218
## 6521 -0.012191759 -0.02427900
## PubDate.last16.log1p.ctg PubDate.last2.log1p.ctg PubDate.last32.log1p
## 1922 15.75212 13.934985 10.010547
## 1627 0.00000 0.000000 9.504129
## 1906 0.00000 12.444822 9.729253
## 302 11.12598 7.814400 10.332897
## 3770 13.37261 10.528838 9.938710
## 4466 13.93166 13.659964 9.313619
## 6435 12.76144 10.337832 10.866967
## 1767 13.86545 12.657569 9.415401
## 1928 15.61660 13.669639 10.027959
## 6517 12.13813 11.371500 12.217912
## 3205 12.37754 8.836665 12.005436
## 6521 12.56239 9.680219 12.196224
## PubDate.last8.log1p WordCount.nexp
## 1922 8.276395 0.000000e+00
## 1627 8.393216 0.000000e+00
## 1906 8.075272 0.000000e+00
## 302 9.741557 0.000000e+00
## 3770 8.392537 3.418239e-166
## 4466 7.827640 2.371872e-102
## 6435 8.695172 1.000000e+00
## 1767 7.891331 0.000000e+00
## 1928 8.536800 0.000000e+00
## 6517 9.753188 2.750325e-314
## 3205 11.443361 1.026188e-10
## 6521 10.414633 0.000000e+00
## id cor.y
## PubDate.day.minutes.poly.1 PubDate.day.minutes.poly.1 0.156753478
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.3 0.027983551
## PubDate.day.minutes.poly.3.ctg PubDate.day.minutes.poly.3.ctg 0.014982807
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4 0.073941394
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.4.ctg 0.014601521
## PubDate.day.minutes.poly.5 PubDate.day.minutes.poly.5 -0.055929231
## PubDate.day.minutes.poly.5.ctg PubDate.day.minutes.poly.5.ctg 0.014574775
## PubDate.last16.log1p.ctg PubDate.last16.log1p.ctg 0.007783530
## PubDate.last2.log1p.ctg PubDate.last2.log1p.ctg 0.006916600
## PubDate.last32.log1p PubDate.last32.log1p 0.003558081
## PubDate.last8.log1p PubDate.last8.log1p 0.054458821
## WordCount.nexp WordCount.nexp -0.053208396
## exclude.as.feat cor.y.abs
## PubDate.day.minutes.poly.1 FALSE 0.156753478
## PubDate.day.minutes.poly.3 FALSE 0.027983551
## PubDate.day.minutes.poly.3.ctg FALSE 0.014982807
## PubDate.day.minutes.poly.4 FALSE 0.073941394
## PubDate.day.minutes.poly.4.ctg FALSE 0.014601521
## PubDate.day.minutes.poly.5 FALSE 0.055929231
## PubDate.day.minutes.poly.5.ctg FALSE 0.014574775
## PubDate.last16.log1p.ctg FALSE 0.007783530
## PubDate.last2.log1p.ctg FALSE 0.006916600
## PubDate.last32.log1p FALSE 0.003558081
## PubDate.last8.log1p FALSE 0.054458821
## WordCount.nexp FALSE 0.053208396
## cor.high.X freqRatio percentUnique
## PubDate.day.minutes.poly.1 <NA> 1.225490 18.08022
## PubDate.day.minutes.poly.3 <NA> 1.225490 18.08022
## PubDate.day.minutes.poly.3.ctg <NA> 1.083333 53.96509
## PubDate.day.minutes.poly.4 <NA> 1.225490 18.08022
## PubDate.day.minutes.poly.4.ctg <NA> 1.083333 53.94979
## PubDate.day.minutes.poly.5 <NA> 1.225490 18.08022
## PubDate.day.minutes.poly.5.ctg <NA> 1.083333 53.94979
## PubDate.last16.log1p.ctg <NA> 60.000000 95.17759
## PubDate.last2.log1p.ctg <NA> 5.000000 92.19228
## PubDate.last32.log1p <NA> 8.000000 90.99816
## PubDate.last8.log1p PubDate.last4.log1p 1.142857 75.12247
## WordCount.nexp <NA> 17.761364 11.32884
## zeroVar nzv is.cor.y.abs.low
## PubDate.day.minutes.poly.1 FALSE FALSE FALSE
## PubDate.day.minutes.poly.3 FALSE FALSE FALSE
## PubDate.day.minutes.poly.3.ctg FALSE FALSE FALSE
## PubDate.day.minutes.poly.4 FALSE FALSE FALSE
## PubDate.day.minutes.poly.4.ctg FALSE FALSE FALSE
## PubDate.day.minutes.poly.5 FALSE FALSE FALSE
## PubDate.day.minutes.poly.5.ctg FALSE FALSE FALSE
## PubDate.last16.log1p.ctg FALSE FALSE TRUE
## PubDate.last2.log1p.ctg FALSE FALSE TRUE
## PubDate.last32.log1p FALSE FALSE TRUE
## PubDate.last8.log1p FALSE FALSE FALSE
## WordCount.nexp FALSE FALSE FALSE
## interaction.feat shapiro.test.p.value
## PubDate.day.minutes.poly.1 <NA> 1.590362e-18
## PubDate.day.minutes.poly.3 <NA> 9.822405e-52
## PubDate.day.minutes.poly.3.ctg NDSSName.my.fctr 1.179915e-64
## PubDate.day.minutes.poly.4 <NA> 1.523136e-47
## PubDate.day.minutes.poly.4.ctg NDSSName.my.fctr 2.214419e-67
## PubDate.day.minutes.poly.5 <NA> 1.157500e-41
## PubDate.day.minutes.poly.5.ctg NDSSName.my.fctr 7.171204e-67
## PubDate.last16.log1p.ctg NDSSName.my.fctr 6.216597e-76
## PubDate.last2.log1p.ctg NDSSName.my.fctr 1.991089e-37
## PubDate.last32.log1p <NA> 2.783236e-77
## PubDate.last8.log1p <NA> 3.859176e-56
## WordCount.nexp <NA> 9.108805e-94
## rsp_var_raw id_var rsp_var max
## PubDate.day.minutes.poly.1 FALSE NA NA 0.02475916
## PubDate.day.minutes.poly.3 FALSE NA NA 0.05215301
## PubDate.day.minutes.poly.3.ctg FALSE NA NA 0.56127224
## PubDate.day.minutes.poly.4 FALSE NA NA 0.06677441
## PubDate.day.minutes.poly.4.ctg FALSE NA NA 0.67700049
## PubDate.day.minutes.poly.5 FALSE NA NA 0.08471756
## PubDate.day.minutes.poly.5.ctg FALSE NA NA 0.56286316
## PubDate.last16.log1p.ctg FALSE NA NA 15.77251197
## PubDate.last2.log1p.ctg FALSE NA NA 15.06116892
## PubDate.last32.log1p FALSE NA NA 12.32340669
## PubDate.last8.log1p FALSE NA NA 11.62246125
## WordCount.nexp FALSE NA NA 1.00000000
## min max.Popular.fctr.N
## PubDate.day.minutes.poly.1 -0.02749464 0.02468654
## PubDate.day.minutes.poly.3 -0.04512497 0.05150779
## PubDate.day.minutes.poly.3.ctg -0.66283168 0.55528441
## PubDate.day.minutes.poly.4 -0.01832740 0.06543120
## PubDate.day.minutes.poly.4.ctg -0.61188413 0.67700049
## PubDate.day.minutes.poly.5 -0.02450918 0.08217780
## PubDate.day.minutes.poly.5.ctg -0.71653445 0.56286316
## PubDate.last16.log1p.ctg 0.00000000 15.72030254
## PubDate.last2.log1p.ctg 0.00000000 14.72999406
## PubDate.last32.log1p 0.00000000 12.21244232
## PubDate.last8.log1p 0.00000000 11.43577441
## WordCount.nexp 0.00000000 1.00000000
## max.Popular.fctr.Y min.Popular.fctr.N
## PubDate.day.minutes.poly.1 0.024468663 -0.02749464
## PubDate.day.minutes.poly.3 0.049597025 -0.04512497
## PubDate.day.minutes.poly.3.ctg 0.363266956 -0.66283168
## PubDate.day.minutes.poly.4 0.061490534 -0.01832740
## PubDate.day.minutes.poly.4.ctg 0.289618754 -0.61188413
## PubDate.day.minutes.poly.5 0.074814724 -0.02450918
## PubDate.day.minutes.poly.5.ctg 0.215852412 -0.71653445
## PubDate.last16.log1p.ctg 15.629535143 0.00000000
## PubDate.last2.log1p.ctg 13.653551472 0.00000000
## PubDate.last32.log1p 12.178408497 0.00000000
## PubDate.last8.log1p 11.394288315 0.00000000
## WordCount.nexp 0.002478752 0.00000000
## min.Popular.fctr.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.3.ctg -0.38280444
## PubDate.day.minutes.poly.4 -0.01821959
## PubDate.day.minutes.poly.4.ctg -0.28243219
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.37058648
## PubDate.last16.log1p.ctg 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last8.log1p 0.00000000
## WordCount.nexp 0.00000000
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1 0.02450498
## PubDate.day.minutes.poly.3 0.04991290
## PubDate.day.minutes.poly.3.ctg 0.41228572
## PubDate.day.minutes.poly.4 0.06213811
## PubDate.day.minutes.poly.4.ctg 0.49508199
## PubDate.day.minutes.poly.5 0.07601554
## PubDate.day.minutes.poly.5.ctg 0.48962874
## PubDate.last16.log1p.ctg 15.77251197
## PubDate.last2.log1p.ctg 14.25350675
## PubDate.last32.log1p 12.17624726
## PubDate.last8.log1p 11.62246125
## WordCount.nexp 1.00000000
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 0.02472285
## PubDate.day.minutes.poly.3 0.05182988
## PubDate.day.minutes.poly.3.ctg 0.43176828
## PubDate.day.minutes.poly.4 0.06610094
## PubDate.day.minutes.poly.4.ctg 0.45727441
## PubDate.day.minutes.poly.5 0.08344228
## PubDate.day.minutes.poly.5.ctg 0.41093522
## PubDate.last16.log1p.ctg 15.75212132
## PubDate.last2.log1p.ctg 13.93498461
## PubDate.last32.log1p 12.21791228
## PubDate.last8.log1p 11.44336100
## WordCount.nexp 1.00000000
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1 -0.02749464
## PubDate.day.minutes.poly.3 -0.04512497
## PubDate.day.minutes.poly.3.ctg -0.39431764
## PubDate.day.minutes.poly.4 -0.01832685
## PubDate.day.minutes.poly.4.ctg -0.51296095
## PubDate.day.minutes.poly.5 -0.02450918
## PubDate.day.minutes.poly.5.ctg -0.45711886
## PubDate.last16.log1p.ctg 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last8.log1p 0.00000000
## WordCount.nexp 0.00000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.3.ctg -0.17571340
## PubDate.day.minutes.poly.4 -0.01824013
## PubDate.day.minutes.poly.4.ctg -0.40164220
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.54313226
## PubDate.last16.log1p.ctg 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last8.log1p 7.12447826
## WordCount.nexp 0.00000000
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1 0.02432341
## PubDate.day.minutes.poly.3 0.04834381
## PubDate.day.minutes.poly.3.ctg 0.56127224
## PubDate.day.minutes.poly.4 0.05893666
## PubDate.day.minutes.poly.4.ctg 0.63819571
## PubDate.day.minutes.poly.5 0.07011504
## PubDate.day.minutes.poly.5.ctg 0.45824974
## PubDate.last16.log1p.ctg 15.68420514
## PubDate.last2.log1p.ctg 15.06116892
## PubDate.last32.log1p 12.32340669
## PubDate.last8.log1p 11.27955479
## WordCount.nexp 1.00000000
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 0.02475916
## PubDate.day.minutes.poly.3 0.05215301
## PubDate.day.minutes.poly.3.ctg 0.34217266
## PubDate.day.minutes.poly.4 0.06677441
## PubDate.day.minutes.poly.4.ctg 0.38235412
## PubDate.day.minutes.poly.5 0.08471756
## PubDate.day.minutes.poly.5.ctg 0.42244492
## PubDate.last16.log1p.ctg 15.67548989
## PubDate.last2.log1p.ctg 14.77515997
## PubDate.last32.log1p 12.30973422
## PubDate.last8.log1p 11.33227851
## WordCount.nexp 0.01831564
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1 -0.02749464
## PubDate.day.minutes.poly.3 -0.04512497
## PubDate.day.minutes.poly.3.ctg -0.65503648
## PubDate.day.minutes.poly.4 -0.01832268
## PubDate.day.minutes.poly.4.ctg -0.23960642
## PubDate.day.minutes.poly.5 -0.02450918
## PubDate.day.minutes.poly.5.ctg -0.35475727
## PubDate.last16.log1p.ctg 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 8.86290830
## PubDate.last8.log1p 7.06133437
## WordCount.nexp 0.00000000
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.3.ctg -0.33854383
## PubDate.day.minutes.poly.4 -0.01820339
## PubDate.day.minutes.poly.4.ctg -0.20524473
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.28096322
## PubDate.last16.log1p.ctg 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 8.83579237
## PubDate.last8.log1p 6.89162590
## WordCount.nexp 0.00000000
## [1] "OOBobs Popular.fctr.All.X..rcv.glmnet N: min < min of Train range: 1"
## UniqueID Popular.fctr.All.X..rcv.glmnet
## 1833 1833 N
## PubDate.day.minutes.poly.1.ctg
## 1833 -0.7070114
## id cor.y
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## exclude.as.feat cor.y.abs cor.high.X
## PubDate.day.minutes.poly.1.ctg FALSE 0.002432289 <NA>
## freqRatio percentUnique zeroVar nzv
## PubDate.day.minutes.poly.1.ctg 1.083333 53.96509 FALSE FALSE
## is.cor.y.abs.low interaction.feat
## PubDate.day.minutes.poly.1.ctg TRUE NDSSName.my.fctr
## shapiro.test.p.value rsp_var_raw id_var
## PubDate.day.minutes.poly.1.ctg 1.051535e-45 FALSE NA
## rsp_var max min
## PubDate.day.minutes.poly.1.ctg NA 0.4812771 -0.7070114
## max.Popular.fctr.N max.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg 0.3596891 0.3295025
## min.Popular.fctr.N min.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg -0.3997769 -0.2457038
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.4812771
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1898852
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.7070114
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2842221
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.2260955
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1964159
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.2546548
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2531302
## [1] "OOBobs Popular.fctr.All.X..rcv.glmnet N: max > max of Train range: 5"
## UniqueID Popular.fctr.All.X..rcv.glmnet
## 4402 4402 N
## 5233 5233 N
## 6528 6528 N
## 1924 1924 N
## 1923 1923 N
## PubDate.day.minutes.poly.1.ctg PubDate.last16.log1p
## 4402 0.48127714 9.200997
## 5233 -0.01681076 11.762734
## 6528 0.02086374 11.956983
## 1924 0.04318327 9.157361
## 1923 -0.06338396 9.173261
## PubDate.last16.log1p.ctg PubDate.last8.log1p
## 4402 0.00000 8.432724
## 5233 11.82663 11.622461
## 6528 12.49349 11.425547
## 1924 15.72590 8.333751
## 1923 15.77251 8.363109
## id cor.y
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## PubDate.last16.log1p PubDate.last16.log1p 0.040735543
## PubDate.last16.log1p.ctg PubDate.last16.log1p.ctg 0.007783530
## PubDate.last8.log1p PubDate.last8.log1p 0.054458821
## exclude.as.feat cor.y.abs
## PubDate.day.minutes.poly.1.ctg FALSE 0.002432289
## PubDate.last16.log1p FALSE 0.040735543
## PubDate.last16.log1p.ctg FALSE 0.007783530
## PubDate.last8.log1p FALSE 0.054458821
## cor.high.X freqRatio percentUnique
## PubDate.day.minutes.poly.1.ctg <NA> 1.083333 53.96509
## PubDate.last16.log1p <NA> 3.200000 84.44581
## PubDate.last16.log1p.ctg <NA> 60.000000 95.17759
## PubDate.last8.log1p PubDate.last4.log1p 1.142857 75.12247
## zeroVar nzv is.cor.y.abs.low
## PubDate.day.minutes.poly.1.ctg FALSE FALSE TRUE
## PubDate.last16.log1p FALSE FALSE FALSE
## PubDate.last16.log1p.ctg FALSE FALSE TRUE
## PubDate.last8.log1p FALSE FALSE FALSE
## interaction.feat shapiro.test.p.value
## PubDate.day.minutes.poly.1.ctg NDSSName.my.fctr 1.051535e-45
## PubDate.last16.log1p <NA> 7.310334e-68
## PubDate.last16.log1p.ctg NDSSName.my.fctr 6.216597e-76
## PubDate.last8.log1p <NA> 3.859176e-56
## rsp_var_raw id_var rsp_var max
## PubDate.day.minutes.poly.1.ctg FALSE NA NA 0.4812771
## PubDate.last16.log1p FALSE NA NA 11.9569829
## PubDate.last16.log1p.ctg FALSE NA NA 15.7725120
## PubDate.last8.log1p FALSE NA NA 11.6224613
## min max.Popular.fctr.N
## PubDate.day.minutes.poly.1.ctg -0.7070114 0.3596891
## PubDate.last16.log1p 0.0000000 11.9453181
## PubDate.last16.log1p.ctg 0.0000000 15.7203025
## PubDate.last8.log1p 0.0000000 11.4357744
## max.Popular.fctr.Y min.Popular.fctr.N
## PubDate.day.minutes.poly.1.ctg 0.3295025 -0.3997769
## PubDate.last16.log1p 11.8776033 0.0000000
## PubDate.last16.log1p.ctg 15.6295351 0.0000000
## PubDate.last8.log1p 11.3942883 0.0000000
## min.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg -0.2457038
## PubDate.last16.log1p 0.0000000
## PubDate.last16.log1p.ctg 0.0000000
## PubDate.last8.log1p 0.0000000
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.4812771
## PubDate.last16.log1p 11.9569829
## PubDate.last16.log1p.ctg 15.7725120
## PubDate.last8.log1p 11.6224613
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1898852
## PubDate.last16.log1p 11.8683176
## PubDate.last16.log1p.ctg 15.7521213
## PubDate.last8.log1p 11.4433610
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.7070114
## PubDate.last16.log1p 0.0000000
## PubDate.last16.log1p.ctg 0.0000000
## PubDate.last8.log1p 0.0000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2842221
## PubDate.last16.log1p 0.0000000
## PubDate.last16.log1p.ctg 0.0000000
## PubDate.last8.log1p 7.1244783
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.2260955
## PubDate.last16.log1p 11.8518091
## PubDate.last16.log1p.ctg 15.6842051
## PubDate.last8.log1p 11.2795548
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1964159
## PubDate.last16.log1p 11.8811317
## PubDate.last16.log1p.ctg 15.6754899
## PubDate.last8.log1p 11.3322785
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.2546548
## PubDate.last16.log1p 8.1016777
## PubDate.last16.log1p.ctg 0.0000000
## PubDate.last8.log1p 7.0613344
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2531302
## PubDate.last16.log1p 8.0922394
## PubDate.last16.log1p.ctg 0.0000000
## PubDate.last8.log1p 6.8916259
## [1] "OOBobs total range outliers: 22"
## [1] "newobs Popular.fctr.Final..rcv.glmnet N: max > max of Train range: 1181"
## UniqueID Popular.fctr.Final..rcv.glmnet
## 6533 6533 N
## 6540 6540 N
## 6541 6541 N
## 6542 6542 N
## 6543 6543 N
## 6545 6545 N
## PubDate.day.minutes.poly.3.ctg PubDate.juliandate
## 6533 0.01381696 335
## 6540 0.02736795 335
## 6541 0.03472372 335
## 6542 0.03704916 335
## 6543 0.02491822 335
## 6545 -0.03157315 335
## PubDate.last2.log1p.ctg PubDate.last32.log1p PubDate.last32.log1p.ctg
## 6533 11.020840 10.134321 13.09830
## 6540 8.212026 9.418817 13.12125
## 6541 10.856592 9.442800 13.12686
## 6542 11.034890 9.391411 13.12590
## 6543 9.618070 9.400878 12.81628
## 6545 9.879502 9.402860 12.99054
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg
## 6533 11.253274 11.89583
## 6540 11.092611 11.26715
## 6541 11.076186 11.28744
## 6542 11.184338 11.82491
## 6543 10.450423 11.38482
## 6545 9.953134 11.38843
## UniqueID Popular.fctr.Final..rcv.glmnet
## 6630 6630 N
## 7488 7488 N
## 7580 7580 N
## 7721 7721 N
## 7912 7912 N
## 8239 8239 N
## PubDate.day.minutes.poly.3.ctg PubDate.juliandate
## 6630 0.001963364 335
## 7488 -0.027083212 346
## 7580 -0.027089537 349
## 7721 -0.027089537 351
## 7912 0.030002733 352
## 8239 -0.044611364 360
## PubDate.last2.log1p.ctg PubDate.last32.log1p PubDate.last32.log1p.ctg
## 6630 7.465655 12.232807 13.04792
## 7488 9.068662 10.812149 12.04403
## 7580 8.407825 10.486904 12.06919
## 7721 9.203920 9.283126 12.02296
## 7912 9.104869 10.726434 13.13566
## 8239 11.373905 11.841769 14.54845
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg
## 6630 11.527913 12.120275
## 7488 9.398395 10.316954
## 7580 9.440340 10.236131
## 7721 9.512073 9.938565
## 7912 11.105694 11.427139
## 8239 12.468833 13.312660
## UniqueID Popular.fctr.Final..rcv.glmnet
## 8393 8393 N
## 8394 8394 N
## 8396 8396 N
## 8398 8398 N
## 8400 8400 N
## 8401 8401 N
## PubDate.day.minutes.poly.3.ctg PubDate.juliandate
## 8393 0.04098057 365
## 8394 -0.27725830 365
## 8396 0.15876582 365
## 8398 -0.14312698 365
## 8400 -0.13809968 365
## 8401 0.19264490 365
## PubDate.last2.log1p.ctg PubDate.last32.log1p PubDate.last32.log1p.ctg
## 8393 11.49139 11.16056 15.12820
## 8394 13.13739 11.14224 15.14526
## 8396 13.98310 11.11651 15.39121
## 8398 6.43294 11.09410 15.90984
## 8400 15.06117 11.07855 15.92029
## 8401 11.51832 11.05398 15.91539
## PubDate.last4.log1p.ctg PubDate.last8.log1p.ctg
## 8393 13.35302 13.96491
## 8394 13.41328 13.96451
## 8396 14.06262 14.57832
## 8398 15.06135 15.37495
## 8400 15.35403 15.48083
## 8401 14.81295 15.38483
## id cor.y
## PubDate.day.minutes.poly.3.ctg PubDate.day.minutes.poly.3.ctg 0.014982807
## PubDate.juliandate PubDate.juliandate 0.014361075
## PubDate.last2.log1p.ctg PubDate.last2.log1p.ctg 0.006916600
## PubDate.last32.log1p PubDate.last32.log1p 0.003558081
## PubDate.last32.log1p.ctg PubDate.last32.log1p.ctg 0.015395971
## PubDate.last4.log1p.ctg PubDate.last4.log1p.ctg 0.004792781
## PubDate.last8.log1p.ctg PubDate.last8.log1p.ctg 0.003914960
## exclude.as.feat cor.y.abs
## PubDate.day.minutes.poly.3.ctg FALSE 0.014982807
## PubDate.juliandate FALSE 0.014361075
## PubDate.last2.log1p.ctg FALSE 0.006916600
## PubDate.last32.log1p FALSE 0.003558081
## PubDate.last32.log1p.ctg FALSE 0.015395971
## PubDate.last4.log1p.ctg FALSE 0.004792781
## PubDate.last8.log1p.ctg FALSE 0.003914960
## cor.high.X freqRatio percentUnique
## PubDate.day.minutes.poly.3.ctg <NA> 1.083333 53.965095
## PubDate.juliandate PubDate.month.fctr 1.032520 1.393141
## PubDate.last2.log1p.ctg <NA> 5.000000 92.192284
## PubDate.last32.log1p <NA> 8.000000 90.998163
## PubDate.last32.log1p.ctg <NA> 239.000000 92.115738
## PubDate.last4.log1p.ctg <NA> 20.000000 95.881813
## PubDate.last8.log1p.ctg <NA> 40.000000 96.417636
## zeroVar nzv is.cor.y.abs.low
## PubDate.day.minutes.poly.3.ctg FALSE FALSE FALSE
## PubDate.juliandate FALSE FALSE FALSE
## PubDate.last2.log1p.ctg FALSE FALSE TRUE
## PubDate.last32.log1p FALSE FALSE TRUE
## PubDate.last32.log1p.ctg FALSE FALSE FALSE
## PubDate.last4.log1p.ctg FALSE FALSE TRUE
## PubDate.last8.log1p.ctg FALSE FALSE TRUE
## interaction.feat shapiro.test.p.value
## PubDate.day.minutes.poly.3.ctg NDSSName.my.fctr 1.179915e-64
## PubDate.juliandate <NA> 1.389406e-35
## PubDate.last2.log1p.ctg NDSSName.my.fctr 1.991089e-37
## PubDate.last32.log1p <NA> 2.783236e-77
## PubDate.last32.log1p.ctg NDSSName.my.fctr 1.647772e-78
## PubDate.last4.log1p.ctg NDSSName.my.fctr 5.833827e-54
## PubDate.last8.log1p.ctg NDSSName.my.fctr 2.241558e-67
## rsp_var_raw id_var rsp_var max
## PubDate.day.minutes.poly.3.ctg FALSE NA NA 0.5612722
## PubDate.juliandate FALSE NA NA 365.0000000
## PubDate.last2.log1p.ctg FALSE NA NA 15.0611689
## PubDate.last32.log1p FALSE NA NA 12.3234067
## PubDate.last32.log1p.ctg FALSE NA NA 15.9202866
## PubDate.last4.log1p.ctg FALSE NA NA 15.3540272
## PubDate.last8.log1p.ctg FALSE NA NA 15.4808349
## min max.Popular.fctr.N
## PubDate.day.minutes.poly.3.ctg -0.6628317 0.5552844
## PubDate.juliandate 244.0000000 334.0000000
## PubDate.last2.log1p.ctg 0.0000000 14.7299941
## PubDate.last32.log1p 0.0000000 12.2124423
## PubDate.last32.log1p.ctg 0.0000000 15.3192168
## PubDate.last4.log1p.ctg 0.0000000 14.6582245
## PubDate.last8.log1p.ctg 0.0000000 15.1997598
## max.Popular.fctr.Y min.Popular.fctr.N
## PubDate.day.minutes.poly.3.ctg 0.363267 -0.6628317
## PubDate.juliandate 334.000000 244.0000000
## PubDate.last2.log1p.ctg 14.253507 0.0000000
## PubDate.last32.log1p 12.217912 0.0000000
## PubDate.last32.log1p.ctg 15.300332 0.0000000
## PubDate.last4.log1p.ctg 14.733865 0.0000000
## PubDate.last8.log1p.ctg 15.122845 0.0000000
## min.Popular.fctr.Y
## PubDate.day.minutes.poly.3.ctg -0.3828044
## PubDate.juliandate 244.0000000
## PubDate.last2.log1p.ctg 0.0000000
## PubDate.last32.log1p 0.0000000
## PubDate.last32.log1p.ctg 0.0000000
## PubDate.last4.log1p.ctg 0.0000000
## PubDate.last8.log1p.ctg 0.0000000
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.3.ctg 0.4122857
## PubDate.juliandate 334.0000000
## PubDate.last2.log1p.ctg 14.2535067
## PubDate.last32.log1p 12.1762473
## PubDate.last32.log1p.ctg 15.2783647
## PubDate.last4.log1p.ctg 14.5891468
## PubDate.last8.log1p.ctg 15.1063837
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.3.ctg 0.4317683
## PubDate.juliandate 334.0000000
## PubDate.last2.log1p.ctg 13.9349846
## PubDate.last32.log1p 12.2179123
## PubDate.last32.log1p.ctg 15.1621929
## PubDate.last4.log1p.ctg 14.3137888
## PubDate.last8.log1p.ctg 14.8952302
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.3.ctg -0.3943176
## PubDate.juliandate 244.0000000
## PubDate.last2.log1p.ctg 0.0000000
## PubDate.last32.log1p 0.0000000
## PubDate.last32.log1p.ctg 0.0000000
## PubDate.last4.log1p.ctg 0.0000000
## PubDate.last8.log1p.ctg 0.0000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.3.ctg -0.1757134
## PubDate.juliandate 244.0000000
## PubDate.last2.log1p.ctg 0.0000000
## PubDate.last32.log1p 0.0000000
## PubDate.last32.log1p.ctg 0.0000000
## PubDate.last4.log1p.ctg 0.0000000
## PubDate.last8.log1p.ctg 0.0000000
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.3.ctg 0.5612722
## PubDate.juliandate 365.0000000
## PubDate.last2.log1p.ctg 15.0611689
## PubDate.last32.log1p 12.3234067
## PubDate.last32.log1p.ctg 15.9202866
## PubDate.last4.log1p.ctg 15.3540272
## PubDate.last8.log1p.ctg 15.4808349
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.3.ctg 0.3421727
## PubDate.juliandate 365.0000000
## PubDate.last2.log1p.ctg 14.7751600
## PubDate.last32.log1p 12.3097342
## PubDate.last32.log1p.ctg 15.9193719
## PubDate.last4.log1p.ctg 15.3435742
## PubDate.last8.log1p.ctg 15.4661689
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.3.ctg -0.6550365
## PubDate.juliandate 335.0000000
## PubDate.last2.log1p.ctg 0.0000000
## PubDate.last32.log1p 8.8629083
## PubDate.last32.log1p.ctg 0.0000000
## PubDate.last4.log1p.ctg 0.0000000
## PubDate.last8.log1p.ctg 0.0000000
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.3.ctg -0.3385438
## PubDate.juliandate 335.0000000
## PubDate.last2.log1p.ctg 0.0000000
## PubDate.last32.log1p 8.8357924
## PubDate.last32.log1p.ctg 0.0000000
## PubDate.last4.log1p.ctg 0.0000000
## PubDate.last8.log1p.ctg 0.0000000
## [1] "newobs Popular.fctr.Final..rcv.glmnet Y: min < min of Train range: 3"
## UniqueID Popular.fctr.Final..rcv.glmnet
## 8217 8217 Y
## 8360 8360 Y
## 8375 8375 Y
## PubDate.day.minutes.poly.1.ctg WordCount.log1p WordCount.root2
## 8217 0.01104094 1.609438 2.00000
## 8360 -0.25313017 7.202661 36.63332
## 8375 -0.25313017 6.274762 23.02173
## id cor.y
## PubDate.day.minutes.poly.1.ctg PubDate.day.minutes.poly.1.ctg -0.002432289
## WordCount.log1p WordCount.log1p 0.254319628
## WordCount.root2 WordCount.root2 0.292120679
## exclude.as.feat cor.y.abs cor.high.X
## PubDate.day.minutes.poly.1.ctg FALSE 0.002432289 <NA>
## WordCount.log1p FALSE 0.254319628 WordCount.root2
## WordCount.root2 FALSE 0.292120679 <NA>
## freqRatio percentUnique zeroVar nzv
## PubDate.day.minutes.poly.1.ctg 1.083333 53.96509 FALSE FALSE
## WordCount.log1p 2.315789 24.15799 FALSE FALSE
## WordCount.root2 2.315789 24.15799 FALSE FALSE
## is.cor.y.abs.low interaction.feat
## PubDate.day.minutes.poly.1.ctg TRUE NDSSName.my.fctr
## WordCount.log1p FALSE <NA>
## WordCount.root2 FALSE <NA>
## shapiro.test.p.value rsp_var_raw id_var
## PubDate.day.minutes.poly.1.ctg 1.051535e-45 FALSE NA
## WordCount.log1p 1.576866e-49 FALSE NA
## WordCount.root2 4.556481e-30 FALSE NA
## rsp_var max min
## PubDate.day.minutes.poly.1.ctg NA 0.4812771 -0.7070114
## WordCount.log1p NA 9.2977100 0.0000000
## WordCount.root2 NA 104.4605189 0.0000000
## max.Popular.fctr.N max.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg 0.4812771 0.3295025
## WordCount.log1p 8.8196653 9.2977100
## WordCount.root2 82.2496201 104.4605189
## min.Popular.fctr.N min.Popular.fctr.Y
## PubDate.day.minutes.poly.1.ctg -0.7070114 -0.2457038
## WordCount.log1p 0.0000000 1.9459101
## WordCount.root2 0.0000000 2.4494897
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.4812771
## WordCount.log1p 7.6511202
## WordCount.root2 45.8475735
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1898852
## WordCount.log1p 9.1408831
## WordCount.root2 96.5815717
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.7070114
## WordCount.log1p 0.0000000
## WordCount.root2 0.0000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2842221
## WordCount.log1p 0.0000000
## WordCount.root2 0.0000000
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg 0.2260955
## WordCount.log1p 7.9409398
## WordCount.root2 53.0000000
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg 0.1964159
## WordCount.log1p 8.6923223
## WordCount.root2 77.1751255
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1.ctg -0.2546548
## WordCount.log1p 0.0000000
## WordCount.root2 0.0000000
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1.ctg -0.2531302
## WordCount.log1p 1.6094379
## WordCount.root2 2.0000000
## [1] "newobs Popular.fctr.Final..rcv.glmnet Y: max > max of Train range: 689"
## UniqueID Popular.fctr.Final..rcv.glmnet PubDate.day.minutes.poly.1
## 6534 6534 Y 0.02047428
## 6535 6535 Y 0.02043797
## 6536 6536 Y 0.01840446
## 6537 6537 Y 0.01437377
## 6538 6538 Y 0.01408327
## 6539 6539 Y 0.01390170
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.4
## 6534 0.020825101 0.010136126
## 6535 0.020614888 0.009828175
## 6536 0.010192448 -0.003724687
## 6537 -0.003371562 -0.013710045
## 6538 -0.004024505 -0.013788116
## 6539 -0.004412350 -0.013803062
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.5
## 6534 -0.026117829 -0.004924177
## 6535 -0.032426274 -0.005269034
## 6536 0.002579915 -0.017201046
## 6537 0.024407410 -0.013755146
## 6538 0.024419023 -0.012806242
## 6539 0.010731259 -0.012191759
## PubDate.day.minutes.poly.5.ctg PubDate.juliandate
## 6534 -0.04109512 335
## 6535 -0.03723150 335
## 6536 0.02875121 335
## 6537 0.01925208 335
## 6538 -0.01676509 335
## 6539 -0.02825414 335
## PubDate.last16.log1p PubDate.last2.log1p.ctg PubDate.last32.log1p
## 6534 9.732284 9.639001 10.036094
## 6535 9.796793 9.896664 10.053458
## 6536 9.590761 9.540219 9.939434
## 6537 9.056023 9.362546 9.564863
## 6538 8.998384 8.234830 9.542733
## 6539 8.966356 7.949797 9.572898
## PubDate.last32.log1p.ctg PubDate.last4.log1p.ctg
## 6534 13.03463 10.283942
## 6535 13.04093 10.357965
## 6536 13.01641 9.780020
## 6537 13.10988 9.580386
## 6538 13.09908 8.916506
## 6539 13.11828 10.909784
## PubDate.last8.log1p.ctg WordCount.nexp
## 6534 10.51086 4.609768e-243
## 6535 11.36460 0.000000e+00
## 6536 11.33092 0.000000e+00
## 6537 11.27835 3.128062e-93
## 6538 11.28285 0.000000e+00
## 6539 11.26846 0.000000e+00
## UniqueID Popular.fctr.Final..rcv.glmnet PubDate.day.minutes.poly.1
## 6671 6671 Y 0.002463239
## 6874 6874 Y -0.005198715
## 7313 7313 Y 0.017424023
## 7521 7521 Y -0.012243356
## 7727 7727 Y 0.008309565
## 8139 8139 Y -0.006033904
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.4
## 6671 -0.005335173 0.006293548
## 6874 0.005883311 0.007407554
## 7313 0.006069933 -0.007952932
## 7521 0.011090158 -0.007858351
## 7727 -0.009706774 -0.005828440
## 8139 0.006975718 0.006208995
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.5
## 6671 -0.0364013973 0.007723689
## 6874 0.0333011143 -0.008968677
## 7313 0.0490173758 -0.018810276
## 7521 0.0002631104 -0.005010155
## 7727 0.0192139368 0.006168476
## 8139 -0.0232492041 -0.010194738
## PubDate.day.minutes.poly.5.ctg PubDate.juliandate
## 6671 -0.014806823 336
## 6874 0.046299668 338
## 7313 0.006710828 345
## 7521 0.030907307 348
## 7727 -0.031892087 351
## 8139 0.002853354 357
## PubDate.last16.log1p PubDate.last2.log1p.ctg PubDate.last32.log1p
## 6671 9.199482 9.562616 9.941120
## 6874 9.338646 9.485849 10.910989
## 7313 9.490167 11.024986 10.097820
## 7521 11.748369 8.335671 11.852415
## 7727 8.710949 8.189800 9.239414
## 8139 9.865993 8.762333 10.983868
## PubDate.last32.log1p.ctg PubDate.last4.log1p.ctg
## 6671 13.21616 10.273429
## 6874 13.98350 11.871844
## 7313 14.13180 11.400014
## 7521 12.03362 9.276222
## 7727 11.97875 10.961399
## 8139 12.76589 9.203517
## PubDate.last8.log1p.ctg WordCount.nexp
## 6671 12.05653 0.000000e+00
## 6874 12.66872 0.000000e+00
## 7313 12.95036 7.204525e-261
## 7521 10.09778 0.000000e+00
## 7727 11.11079 0.000000e+00
## 8139 11.36641 4.344235e-104
## UniqueID Popular.fctr.Final..rcv.glmnet PubDate.day.minutes.poly.1
## 8391 8391 Y -0.007559033
## 8392 8392 Y -0.007885846
## 8395 8395 Y -0.010173539
## 8397 8397 Y -0.012134418
## 8399 8399 Y -0.014204235
## 8402 8402 Y -0.027458327
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.4
## 8391 0.008724731 0.003454984
## 8392 0.009050092 0.002780751
## 8395 0.010715870 -0.002557148
## 8397 0.011101808 -0.007579609
## 8399 0.010203360 -0.012625915
## 8402 -0.044820236 0.033658267
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.5
## 8391 -0.129369992 -0.011437989
## 8392 0.019130981 -0.011513182
## 8395 -0.194219443 -0.009889729
## 8397 0.014839874 -0.005346258
## 8399 0.021396462 0.002370447
## 8402 0.001133258 -0.023627798
## PubDate.day.minutes.poly.5.ctg PubDate.juliandate
## 8391 -0.06697667 365
## 8392 0.15930790 365
## 8395 0.12356866 365
## 8397 -0.20117350 365
## 8399 0.16099960 365
## 8402 -0.19528592 365
## PubDate.last16.log1p PubDate.last2.log1p.ctg PubDate.last32.log1p
## 8391 10.97006 11.99075 11.15722
## 8392 10.96164 13.18411 11.15937
## 8395 10.93203 11.44674 11.14428
## 8397 10.91201 14.50789 11.11095
## 8399 10.86224 14.77516 11.10274
## 8402 10.46110 11.93860 10.79296
## PubDate.last32.log1p.ctg PubDate.last4.log1p.ctg
## 8391 0.00000 12.95591
## 8392 15.18202 13.66929
## 8395 15.14071 13.30663
## 8397 15.53790 14.55366
## 8399 15.91937 15.34357
## 8402 15.91787 11.94265
## PubDate.last8.log1p.ctg WordCount.nexp
## 8391 14.00877 0.000000e+00
## 8392 14.13371 0.000000e+00
## 8395 13.83971 9.054614e-156
## 8397 14.83106 0.000000e+00
## 8399 15.46617 0.000000e+00
## 8402 15.38649 0.000000e+00
## id cor.y
## PubDate.day.minutes.poly.1 PubDate.day.minutes.poly.1 0.156753478
## PubDate.day.minutes.poly.3 PubDate.day.minutes.poly.3 0.027983551
## PubDate.day.minutes.poly.4 PubDate.day.minutes.poly.4 0.073941394
## PubDate.day.minutes.poly.4.ctg PubDate.day.minutes.poly.4.ctg 0.014601521
## PubDate.day.minutes.poly.5 PubDate.day.minutes.poly.5 -0.055929231
## PubDate.day.minutes.poly.5.ctg PubDate.day.minutes.poly.5.ctg 0.014574775
## PubDate.juliandate PubDate.juliandate 0.014361075
## PubDate.last16.log1p PubDate.last16.log1p 0.040735543
## PubDate.last2.log1p.ctg PubDate.last2.log1p.ctg 0.006916600
## PubDate.last32.log1p PubDate.last32.log1p 0.003558081
## PubDate.last32.log1p.ctg PubDate.last32.log1p.ctg 0.015395971
## PubDate.last4.log1p.ctg PubDate.last4.log1p.ctg 0.004792781
## PubDate.last8.log1p.ctg PubDate.last8.log1p.ctg 0.003914960
## WordCount.nexp WordCount.nexp -0.053208396
## exclude.as.feat cor.y.abs
## PubDate.day.minutes.poly.1 FALSE 0.156753478
## PubDate.day.minutes.poly.3 FALSE 0.027983551
## PubDate.day.minutes.poly.4 FALSE 0.073941394
## PubDate.day.minutes.poly.4.ctg FALSE 0.014601521
## PubDate.day.minutes.poly.5 FALSE 0.055929231
## PubDate.day.minutes.poly.5.ctg FALSE 0.014574775
## PubDate.juliandate FALSE 0.014361075
## PubDate.last16.log1p FALSE 0.040735543
## PubDate.last2.log1p.ctg FALSE 0.006916600
## PubDate.last32.log1p FALSE 0.003558081
## PubDate.last32.log1p.ctg FALSE 0.015395971
## PubDate.last4.log1p.ctg FALSE 0.004792781
## PubDate.last8.log1p.ctg FALSE 0.003914960
## WordCount.nexp FALSE 0.053208396
## cor.high.X freqRatio percentUnique
## PubDate.day.minutes.poly.1 <NA> 1.225490 18.080220
## PubDate.day.minutes.poly.3 <NA> 1.225490 18.080220
## PubDate.day.minutes.poly.4 <NA> 1.225490 18.080220
## PubDate.day.minutes.poly.4.ctg <NA> 1.083333 53.949786
## PubDate.day.minutes.poly.5 <NA> 1.225490 18.080220
## PubDate.day.minutes.poly.5.ctg <NA> 1.083333 53.949786
## PubDate.juliandate PubDate.month.fctr 1.032520 1.393141
## PubDate.last16.log1p <NA> 3.200000 84.445805
## PubDate.last2.log1p.ctg <NA> 5.000000 92.192284
## PubDate.last32.log1p <NA> 8.000000 90.998163
## PubDate.last32.log1p.ctg <NA> 239.000000 92.115738
## PubDate.last4.log1p.ctg <NA> 20.000000 95.881813
## PubDate.last8.log1p.ctg <NA> 40.000000 96.417636
## WordCount.nexp <NA> 17.761364 11.328843
## zeroVar nzv is.cor.y.abs.low
## PubDate.day.minutes.poly.1 FALSE FALSE FALSE
## PubDate.day.minutes.poly.3 FALSE FALSE FALSE
## PubDate.day.minutes.poly.4 FALSE FALSE FALSE
## PubDate.day.minutes.poly.4.ctg FALSE FALSE FALSE
## PubDate.day.minutes.poly.5 FALSE FALSE FALSE
## PubDate.day.minutes.poly.5.ctg FALSE FALSE FALSE
## PubDate.juliandate FALSE FALSE FALSE
## PubDate.last16.log1p FALSE FALSE FALSE
## PubDate.last2.log1p.ctg FALSE FALSE TRUE
## PubDate.last32.log1p FALSE FALSE TRUE
## PubDate.last32.log1p.ctg FALSE FALSE FALSE
## PubDate.last4.log1p.ctg FALSE FALSE TRUE
## PubDate.last8.log1p.ctg FALSE FALSE TRUE
## WordCount.nexp FALSE FALSE FALSE
## interaction.feat shapiro.test.p.value
## PubDate.day.minutes.poly.1 <NA> 1.590362e-18
## PubDate.day.minutes.poly.3 <NA> 9.822405e-52
## PubDate.day.minutes.poly.4 <NA> 1.523136e-47
## PubDate.day.minutes.poly.4.ctg NDSSName.my.fctr 2.214419e-67
## PubDate.day.minutes.poly.5 <NA> 1.157500e-41
## PubDate.day.minutes.poly.5.ctg NDSSName.my.fctr 7.171204e-67
## PubDate.juliandate <NA> 1.389406e-35
## PubDate.last16.log1p <NA> 7.310334e-68
## PubDate.last2.log1p.ctg NDSSName.my.fctr 1.991089e-37
## PubDate.last32.log1p <NA> 2.783236e-77
## PubDate.last32.log1p.ctg NDSSName.my.fctr 1.647772e-78
## PubDate.last4.log1p.ctg NDSSName.my.fctr 5.833827e-54
## PubDate.last8.log1p.ctg NDSSName.my.fctr 2.241558e-67
## WordCount.nexp <NA> 9.108805e-94
## rsp_var_raw id_var rsp_var max
## PubDate.day.minutes.poly.1 FALSE NA NA 0.02475916
## PubDate.day.minutes.poly.3 FALSE NA NA 0.05215301
## PubDate.day.minutes.poly.4 FALSE NA NA 0.06677441
## PubDate.day.minutes.poly.4.ctg FALSE NA NA 0.67700049
## PubDate.day.minutes.poly.5 FALSE NA NA 0.08471756
## PubDate.day.minutes.poly.5.ctg FALSE NA NA 0.56286316
## PubDate.juliandate FALSE NA NA 365.00000000
## PubDate.last16.log1p FALSE NA NA 11.95698288
## PubDate.last2.log1p.ctg FALSE NA NA 15.06116892
## PubDate.last32.log1p FALSE NA NA 12.32340669
## PubDate.last32.log1p.ctg FALSE NA NA 15.92028658
## PubDate.last4.log1p.ctg FALSE NA NA 15.35402717
## PubDate.last8.log1p.ctg FALSE NA NA 15.48083492
## WordCount.nexp FALSE NA NA 1.00000000
## min max.Popular.fctr.N
## PubDate.day.minutes.poly.1 -0.02749464 0.02472285
## PubDate.day.minutes.poly.3 -0.04512497 0.05182988
## PubDate.day.minutes.poly.4 -0.01832740 0.06610094
## PubDate.day.minutes.poly.4.ctg -0.61188413 0.67700049
## PubDate.day.minutes.poly.5 -0.02450918 0.08344228
## PubDate.day.minutes.poly.5.ctg -0.71653445 0.56286316
## PubDate.juliandate 244.00000000 334.00000000
## PubDate.last16.log1p 0.00000000 11.95698288
## PubDate.last2.log1p.ctg 0.00000000 14.72999406
## PubDate.last32.log1p 0.00000000 12.21244232
## PubDate.last32.log1p.ctg 0.00000000 15.31921677
## PubDate.last4.log1p.ctg 0.00000000 14.65822450
## PubDate.last8.log1p.ctg 0.00000000 15.19975983
## WordCount.nexp 0.00000000 1.00000000
## max.Popular.fctr.Y min.Popular.fctr.N
## PubDate.day.minutes.poly.1 2.446866e-02 -0.02749464
## PubDate.day.minutes.poly.3 4.959703e-02 -0.04512497
## PubDate.day.minutes.poly.4 6.149053e-02 -0.01832740
## PubDate.day.minutes.poly.4.ctg 3.330545e-01 -0.61188413
## PubDate.day.minutes.poly.5 7.481472e-02 -0.02450918
## PubDate.day.minutes.poly.5.ctg 2.158524e-01 -0.71653445
## PubDate.juliandate 3.340000e+02 244.00000000
## PubDate.last16.log1p 1.187760e+01 0.00000000
## PubDate.last2.log1p.ctg 1.425351e+01 0.00000000
## PubDate.last32.log1p 1.221791e+01 0.00000000
## PubDate.last32.log1p.ctg 1.530033e+01 0.00000000
## PubDate.last4.log1p.ctg 1.473386e+01 0.00000000
## PubDate.last8.log1p.ctg 1.512285e+01 0.00000000
## WordCount.nexp 2.478752e-03 0.00000000
## min.Popular.fctr.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.4 -0.01821959
## PubDate.day.minutes.poly.4.ctg -0.28243219
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.37058648
## PubDate.juliandate 244.00000000
## PubDate.last16.log1p 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last32.log1p.ctg 0.00000000
## PubDate.last4.log1p.ctg 0.00000000
## PubDate.last8.log1p.ctg 0.00000000
## WordCount.nexp 0.00000000
## max.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1 0.02450498
## PubDate.day.minutes.poly.3 0.04991290
## PubDate.day.minutes.poly.4 0.06213811
## PubDate.day.minutes.poly.4.ctg 0.49508199
## PubDate.day.minutes.poly.5 0.07601554
## PubDate.day.minutes.poly.5.ctg 0.48962874
## PubDate.juliandate 334.00000000
## PubDate.last16.log1p 11.95698288
## PubDate.last2.log1p.ctg 14.25350675
## PubDate.last32.log1p 12.17624726
## PubDate.last32.log1p.ctg 15.27836472
## PubDate.last4.log1p.ctg 14.58914681
## PubDate.last8.log1p.ctg 15.10638373
## WordCount.nexp 1.00000000
## max.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 0.02472285
## PubDate.day.minutes.poly.3 0.05182988
## PubDate.day.minutes.poly.4 0.06610094
## PubDate.day.minutes.poly.4.ctg 0.45727441
## PubDate.day.minutes.poly.5 0.08344228
## PubDate.day.minutes.poly.5.ctg 0.41093522
## PubDate.juliandate 334.00000000
## PubDate.last16.log1p 11.86831759
## PubDate.last2.log1p.ctg 13.93498461
## PubDate.last32.log1p 12.21791228
## PubDate.last32.log1p.ctg 15.16219287
## PubDate.last4.log1p.ctg 14.31378879
## PubDate.last8.log1p.ctg 14.89523017
## WordCount.nexp 1.00000000
## min.Popular.fctr.All.X..rcv.glmnet.N
## PubDate.day.minutes.poly.1 -0.02749464
## PubDate.day.minutes.poly.3 -0.04512497
## PubDate.day.minutes.poly.4 -0.01832685
## PubDate.day.minutes.poly.4.ctg -0.51296095
## PubDate.day.minutes.poly.5 -0.02450918
## PubDate.day.minutes.poly.5.ctg -0.45711886
## PubDate.juliandate 244.00000000
## PubDate.last16.log1p 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last32.log1p.ctg 0.00000000
## PubDate.last4.log1p.ctg 0.00000000
## PubDate.last8.log1p.ctg 0.00000000
## WordCount.nexp 0.00000000
## min.Popular.fctr.All.X..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.4 -0.01824013
## PubDate.day.minutes.poly.4.ctg -0.40164220
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.54313226
## PubDate.juliandate 244.00000000
## PubDate.last16.log1p 0.00000000
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 0.00000000
## PubDate.last32.log1p.ctg 0.00000000
## PubDate.last4.log1p.ctg 0.00000000
## PubDate.last8.log1p.ctg 0.00000000
## WordCount.nexp 0.00000000
## max.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1 0.02432341
## PubDate.day.minutes.poly.3 0.04834381
## PubDate.day.minutes.poly.4 0.05893666
## PubDate.day.minutes.poly.4.ctg 0.63819571
## PubDate.day.minutes.poly.5 0.07011504
## PubDate.day.minutes.poly.5.ctg 0.45824974
## PubDate.juliandate 365.00000000
## PubDate.last16.log1p 11.85180908
## PubDate.last2.log1p.ctg 15.06116892
## PubDate.last32.log1p 12.32340669
## PubDate.last32.log1p.ctg 15.92028658
## PubDate.last4.log1p.ctg 15.35402717
## PubDate.last8.log1p.ctg 15.48083492
## WordCount.nexp 1.00000000
## max.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 0.02475916
## PubDate.day.minutes.poly.3 0.05215301
## PubDate.day.minutes.poly.4 0.06677441
## PubDate.day.minutes.poly.4.ctg 0.38235412
## PubDate.day.minutes.poly.5 0.08471756
## PubDate.day.minutes.poly.5.ctg 0.42244492
## PubDate.juliandate 365.00000000
## PubDate.last16.log1p 11.88113167
## PubDate.last2.log1p.ctg 14.77515997
## PubDate.last32.log1p 12.30973422
## PubDate.last32.log1p.ctg 15.91937187
## PubDate.last4.log1p.ctg 15.34357419
## PubDate.last8.log1p.ctg 15.46616891
## WordCount.nexp 0.01831564
## min.Popular.fctr.Final..rcv.glmnet.N
## PubDate.day.minutes.poly.1 -0.02749464
## PubDate.day.minutes.poly.3 -0.04512497
## PubDate.day.minutes.poly.4 -0.01832268
## PubDate.day.minutes.poly.4.ctg -0.23960642
## PubDate.day.minutes.poly.5 -0.02450918
## PubDate.day.minutes.poly.5.ctg -0.35475727
## PubDate.juliandate 335.00000000
## PubDate.last16.log1p 8.10167775
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 8.86290830
## PubDate.last32.log1p.ctg 0.00000000
## PubDate.last4.log1p.ctg 0.00000000
## PubDate.last8.log1p.ctg 0.00000000
## WordCount.nexp 0.00000000
## min.Popular.fctr.Final..rcv.glmnet.Y
## PubDate.day.minutes.poly.1 -0.02745833
## PubDate.day.minutes.poly.3 -0.04482024
## PubDate.day.minutes.poly.4 -0.01820339
## PubDate.day.minutes.poly.4.ctg -0.20524473
## PubDate.day.minutes.poly.5 -0.02362780
## PubDate.day.minutes.poly.5.ctg -0.28096322
## PubDate.juliandate 335.00000000
## PubDate.last16.log1p 8.09223941
## PubDate.last2.log1p.ctg 0.00000000
## PubDate.last32.log1p 8.83579237
## PubDate.last32.log1p.ctg 0.00000000
## PubDate.last4.log1p.ctg 0.00000000
## PubDate.last8.log1p.ctg 0.00000000
## WordCount.nexp 0.00000000
## [1] "newobs total range outliers: 1870"
#stop(here"); glb2Sav(); sav_obsout_df <- obsout_df; all.equal(sav_obsout_df, obsout_df); obsout_df <- sav_obsout_df
# This does not work for classification since AUC distribution might be different for different models
# -> Run glm on .prob from this glb_fin_mdl_id & .prob from stacked file & stack condition as a feature
if (!is.null(glbOutStackFnames)) {
for (fname in glbOutStackFnames) {
print(sprintf("Stacking file %s to prediction output...", fname))
#obsout_df <- dplyr::arrange_(rbind(obsout_df, read.csv(fname)), "UniqueID")
obsout_df <- dplyr::arrange_(rbind(obsout_df,
#read.csv(fname) %>% filter(!(UniqueID %in% obsout_df$UniqueID))),
#read.csv(fname) %>% filter(!(UniqueID %in% obsout_df[, glb_id_var]))),
read.csv(fname) %>%
dplyr::filter_(interp(~!(var %in% obsout_df$var),
var = as.name(glb_id_var)))),
glb_id_var)
if (nrow(obsout_df) != length(unique(obsout_df[, glb_id_var])))
stop("Potential dups in stacked prediction output")
}
}
out_fname <- paste0(glb_out_pfx, "out.csv")
write.csv(obsout_df, out_fname, quote = FALSE, row.names = FALSE)
#cat(" ", "\n", file=submit_fn, append=TRUE)
# print(orderBy(~ -max.auc.OOB, glb_models_df[, c("model_id",
# "max.auc.OOB", "max.Accuracy.OOB")]))
for (txt_var in glbFeatsText) {
# Print post-stem-words but need post-stop-words for debugging ?
print(sprintf(" All post-stem-words TfIDf terms for %s:", txt_var))
myprint_df(glb_post_stem_words_terms_df_lst[[txt_var]])
TfIdf_mtrx <- glb_post_stem_words_TfIdf_mtrx_lst[[txt_var]]
print(glbObsAll[
which(TfIdf_mtrx[, tail(glb_post_stem_words_terms_df_lst[[txt_var]], 1)$pos] > 0),
c(glb_id_var, glbFeatsText)])
print(nrow(subset(glb_post_stem_words_terms_df_lst[[txt_var]], freq == 1)))
#print(glbObsAll[which(TfIdf_mtrx[, 207] > 0), c(glb_id_var, glbFeatsText)])
#unlist(strsplit(glbObsAll[2157, "description"], ""))
#glbObsAll[2442, c(glb_id_var, glbFeatsText)]
#TfIdf_mtrx[2442, TfIdf_mtrx[2442, ] > 0]
print(sprintf(" Top_n post_stem_words TfIDf terms for %s:", txt_var))
tmp_df <- glb_post_stem_words_terms_df_lst[[txt_var]]
top_n_vctr <- tmp_df$term[1:glb_txt_top_n[[txt_var]]]
tmp_freq1_df <- subset(tmp_df, freq == 1)
tmp_freq1_df$top_n <- grepl(paste0(top_n_vctr, collapse="|"), tmp_freq1_df$term)
print(subset(tmp_freq1_df, top_n == TRUE))
}
if (glb_is_classification && glb_is_binomial)
print(glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"])
## numeric(0)
print(sprintf("glb_sel_mdl_id: %s", glb_sel_mdl_id))
## [1] "glb_sel_mdl_id: All.X##rcv#glmnet"
print(sprintf("glb_fin_mdl_id: %s", glb_fin_mdl_id))
## [1] "glb_fin_mdl_id: Final##rcv#glmnet"
get_dsp_models_df()
## [1] "Cross Validation issues:"
## Warning in get_dsp_models_df(): Cross Validation issues:
## MFO###myMFO_classfr Random###myrandom_classfr
## 0 0
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1.cp.0###rpart
## 0 0
## id
## Max.cor.Y##rcv#rpart Max.cor.Y##rcv#rpart
## Low.cor.X##rcv#glmnet Low.cor.X##rcv#glmnet
## Max.cor.Y.TmSrs.poly##rcv#glmnet Max.cor.Y.TmSrs.poly##rcv#glmnet
## All.X##rcv#glmnet All.X##rcv#glmnet
## Max.cor.Y.rcv.1X1.cp.0###rpart Max.cor.Y.rcv.1X1.cp.0###rpart
## Interact.High.cor.Y##rcv#glmnet Interact.High.cor.Y##rcv#glmnet
## Max.cor.Y.rcv.1X1###glmnet Max.cor.Y.rcv.1X1###glmnet
## Max.cor.Y.rcv.5X3##rcv#glmnet Max.cor.Y.rcv.5X3##rcv#glmnet
## Max.cor.Y.rcv.5X1##rcv#glmnet Max.cor.Y.rcv.5X1##rcv#glmnet
## Max.cor.Y.rcv.5X5##rcv#glmnet Max.cor.Y.rcv.5X5##rcv#glmnet
## Max.cor.Y.rcv.3X1##rcv#glmnet Max.cor.Y.rcv.3X1##rcv#glmnet
## Max.cor.Y.rcv.3X3##rcv#glmnet Max.cor.Y.rcv.3X3##rcv#glmnet
## Max.cor.Y.rcv.3X5##rcv#glmnet Max.cor.Y.rcv.3X5##rcv#glmnet
## MFO###myMFO_classfr MFO###myMFO_classfr
## Random###myrandom_classfr Random###myrandom_classfr
## Final##rcv#glmnet Final##rcv#glmnet
## max.Accuracy.OOB max.AUCROCR.OOB
## Max.cor.Y##rcv#rpart 0.8200231 0.5892132
## Low.cor.X##rcv#glmnet 0.7783565 0.8052766
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.7754630 0.8049472
## All.X##rcv#glmnet 0.7731481 0.8075492
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.7673611 0.7773858
## Interact.High.cor.Y##rcv#glmnet 0.7656250 0.8140971
## Max.cor.Y.rcv.1X1###glmnet 0.7604167 0.8116126
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.7604167 0.8114863
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.7575231 0.8067975
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.7575231 0.8067975
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.7575231 0.8067975
## MFO###myMFO_classfr 0.1331019 0.5000000
## Random###myrandom_classfr 0.1331019 0.4857956
## Final##rcv#glmnet NA NA
## max.AUCpROC.OOB max.Accuracy.fit
## Max.cor.Y##rcv#rpart 0.5870523 0.9296422
## Low.cor.X##rcv#glmnet 0.5917252 0.9276303
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.5965780 0.9323484
## All.X##rcv#glmnet 0.5930603 0.9270754
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.6174697 0.9381765
## Interact.High.cor.Y##rcv#glmnet 0.6009259 0.9315850
## Max.cor.Y.rcv.1X1###glmnet 0.5962443 0.9329725
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5962443 0.9333905
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5962443 0.9331818
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5962443 0.9331816
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.5962443 0.9335973
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.5962443 0.9333193
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.5962443 0.9332218
## MFO###myMFO_classfr 0.5000000 0.1796420
## Random###myrandom_classfr 0.5125675 0.1796420
## Final##rcv#glmnet NA 0.9052358
## opt.prob.threshold.fit
## Max.cor.Y##rcv#rpart 0.6
## Low.cor.X##rcv#glmnet 0.2
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.4
## All.X##rcv#glmnet 0.3
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.4
## Interact.High.cor.Y##rcv#glmnet 0.4
## Max.cor.Y.rcv.1X1###glmnet 0.5
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.5
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.5
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.5
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.4
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.4
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.4
## MFO###myMFO_classfr 0.1
## Random###myrandom_classfr 0.1
## Final##rcv#glmnet 0.3
## opt.prob.threshold.OOB
## Max.cor.Y##rcv#rpart 0.6
## Low.cor.X##rcv#glmnet 0.1
## Max.cor.Y.TmSrs.poly##rcv#glmnet 0.1
## All.X##rcv#glmnet 0.1
## Max.cor.Y.rcv.1X1.cp.0###rpart 0.1
## Interact.High.cor.Y##rcv#glmnet 0.1
## Max.cor.Y.rcv.1X1###glmnet 0.1
## Max.cor.Y.rcv.5X3##rcv#glmnet 0.1
## Max.cor.Y.rcv.5X1##rcv#glmnet 0.1
## Max.cor.Y.rcv.5X5##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X1##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X3##rcv#glmnet 0.1
## Max.cor.Y.rcv.3X5##rcv#glmnet 0.1
## MFO###myMFO_classfr 0.1
## Random###myrandom_classfr 0.1
## Final##rcv#glmnet NA
if (glb_is_regression) {
print(sprintf("%s OOB RMSE: %0.4f", glb_sel_mdl_id,
glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, "min.RMSE.OOB"]))
if (!is.null(glbFeatsCategory)) {
tmp_OOBobs_df <- glbObsOOB[, c(glbFeatsCategory, glb_rsp_var,
predct_error_var_name)]
names(tmp_OOBobs_df)[length(names(tmp_OOBobs_df))] <- "error.abs.OOB"
sOOB_ctgry_df <- dplyr::group_by_(tmp_OOBobs_df, glbFeatsCategory)
sOOB_ctgry_df <- dplyr::count(sOOB_ctgry_df,
startprice.log10.abs.OOB.sum = sum(abs(startprice.log10)),
err.abs.OOB.sum = sum(error.abs.OOB),
err.abs.OOB.mean = mean(error.abs.OOB))
names(sOOB_ctgry_df)[4] <- ".n.OOB"
sOOB_ctgry_df <- dplyr::ungroup(sOOB_ctgry_df)
#intersect(names(glbLvlCategory), names(sOOB_ctgry_df))
glbLvlCategory <- merge(glbLvlCategory, sOOB_ctgry_df, all=TRUE)
print(orderBy(~-err.abs.OOB.mean, glbLvlCategory))
}
if ((glb_rsp_var %in% names(glbObsNew)) &&
!(any(is.na(glbObsNew[, glb_rsp_var])))) {
pred_stats_df <-
mypredict_mdl(mdl = glb_models_lst[[glb_fin_mdl_id]],
df = glbObsNew,
rsp_var = glb_rsp_var,
label = "new",
model_summaryFunction = glb_sel_mdl$control$summaryFunction,
model_metric = glb_sel_mdl$metric,
model_metric_maximize = glb_sel_mdl$maximize,
ret_type = "stats")
print(sprintf("%s prediction stats for glbObsNew:", glb_fin_mdl_id))
print(pred_stats_df)
}
}
if (glb_is_classification) {
print(sprintf("%s OOB confusion matrix & accuracy: ", glb_sel_mdl_id))
print(t(confusionMatrix(glbObsOOB[, mygetPredictIds(glb_rsp_var, glb_sel_mdl_id)$value],
glbObsOOB[, glb_rsp_var])$table))
if (!is.null(glbFeatsCategory)) {
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsTrn, mdl_id = glb_fin_mdl_id,
label = "trn"),
by = glbFeatsCategory, all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
glbLvlCategory <- merge(glbLvlCategory,
myget_category_stats(obs_df = glbObsNew, mdl_id = glb_fin_mdl_id,
label = "new"),
by = glbFeatsCategory, all = TRUE)
row.names(glbLvlCategory) <- glbLvlCategory[, glbFeatsCategory]
clmnOrder <- sort(setdiff(names(glbLvlCategory), glbFeatsCategory))
names(clmnOrder) <- gsub("^err\\.abs\\.(.{3})\\.(.+)", "err\\.abs\\.\\2\\.\\1", clmnOrder)
clmnOrder <- clmnOrder[sort(names(clmnOrder))]
glbLvlCategory <- glbLvlCategory[, c(glbFeatsCategory, clmnOrder)]
if (any(grepl("OOB", glbMdlMetricsEval)))
print(orderBy(~-err.abs.OOB.mean, glbLvlCategory[, -1])) else
print(orderBy(~-err.abs.fit.mean, glbLvlCategory[, -1]))
print(colSums(glbLvlCategory[, -grep(glbFeatsCategory,
names(glbLvlCategory))]))
}
if ((glb_rsp_var %in% names(glbObsNew)) &&
!(any(is.na(glbObsNew[, glb_rsp_var])))) {
print(sprintf("%s new confusion matrix & accuracy: ", glb_fin_mdl_id))
print(t(confusionMatrix(glbObsNew[, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value],
glbObsNew[, glb_rsp_var])$table))
}
}
## [1] "All.X##rcv#glmnet OOB confusion matrix & accuracy: "
## Prediction
## Reference N Y
## N 1180 318
## Y 74 156
## .freqRatio.Fit .freqRatio.OOB
## #Opinion#ThePublicEditor 0.003330558 0.0023148148
## OpEd#Opinion# 0.090965862 0.0515046296
## Styles#U.S.# 0.026436303 0.0289351852
## Science#Health# 0.030807660 0.0277777778
## Business#Crosswords/Games# 0.021856786 0.0104166667
## Business#Technology# 0.044338052 0.0729166667
## ## 0.190049958 0.2146990741
## Business#BusinessDay#Dealbook 0.130932556 0.1869212963
## Culture#Arts# 0.101998335 0.1070601852
## Metro#N.Y./Region# 0.026644463 0.0405092593
## #Opinion#RoomForDebate 0.008742714 0.0115740741
## Styles##Fashion 0.021648626 0.0086805556
## Business#BusinessDay#SmallBusiness 0.020815987 0.0231481481
## Travel#Travel# 0.017277269 0.0196759259
## Foreign#World#AsiaPacific 0.031223980 0.0306712963
## #Multimedia# 0.019150708 0.0283564815
## myOther 0.006869276 0.0028935185
## TStyle## 0.129683597 0.0584490741
## Culture## NA 0.0005787037
## Foreign#World# 0.026644463 0.0254629630
## #U.S.#Education 0.050582848 0.0474537037
## .freqRatio.Tst .n.Fit .n.New.N .n.New.Y
## #Opinion#ThePublicEditor 0.005347594 16 NA 10
## OpEd#Opinion# 0.087700535 437 NA 164
## Styles#U.S.# 0.032620321 127 NA 61
## Science#Health# 0.030481283 148 NA 57
## Business#Crosswords/Games# 0.022459893 105 NA 42
## Business#Technology# 0.060962567 213 35 79
## ## 0.182887701 913 262 80
## Business#BusinessDay#Dealbook 0.162566845 629 198 106
## Culture#Arts# 0.093048128 490 161 13
## Metro#N.Y./Region# 0.035828877 128 43 24
## #Opinion#RoomForDebate 0.010695187 42 17 3
## Styles##Fashion 0.008021390 104 15 NA
## Business#BusinessDay#SmallBusiness 0.021925134 100 29 12
## Travel#Travel# 0.018716578 83 35 NA
## Foreign#World#AsiaPacific 0.029946524 150 48 8
## #Multimedia# 0.027807487 92 48 4
## myOther 0.002673797 33 4 1
## TStyle## 0.056149733 623 104 1
## Culture## 0.037433155 NA 51 19
## Foreign#World# 0.025133690 128 46 1
## #U.S.#Education 0.047593583 243 85 4
## .n.OOB .n.Trn.N .n.Trn.Y .n.Tst .n.fit
## #Opinion#ThePublicEditor 4 4 16 10 16
## OpEd#Opinion# 89 117 409 164 437
## Styles#U.S.# 50 77 100 61 127
## Science#Health# 48 74 122 57 148
## Business#Crosswords/Games# 18 20 103 42 105
## Business#Technology# 126 288 51 114 213
## ## 371 1169 115 342 913
## Business#BusinessDay#Dealbook 323 864 88 304 629
## Culture#Arts# 185 625 50 174 490
## Metro#N.Y./Region# 70 181 17 67 128
## #Opinion#RoomForDebate 20 61 1 20 42
## Styles##Fashion 15 118 1 15 104
## Business#BusinessDay#SmallBusiness 40 135 5 41 100
## Travel#Travel# 34 116 1 35 83
## Foreign#World#AsiaPacific 53 200 3 56 150
## #Multimedia# 49 139 2 52 92
## myOther 5 38 NA 5 33
## TStyle## 101 715 9 105 623
## Culture## 1 1 NA 70 NA
## Foreign#World# 44 172 NA 47 128
## #U.S.#Education 82 325 NA 89 243
## .n.new .n.trn err.abs.OOB.mean
## #Opinion#ThePublicEditor 10 20 0.55603957
## OpEd#Opinion# 164 526 0.55499460
## Styles#U.S.# 61 177 0.52206539
## Science#Health# 57 196 0.50721369
## Business#Crosswords/Games# 42 123 0.49608626
## Business#Technology# 114 339 0.22509451
## ## 342 1284 0.19081551
## Business#BusinessDay#Dealbook 304 952 0.19052186
## Culture#Arts# 174 675 0.17637527
## Metro#N.Y./Region# 67 198 0.16909622
## #Opinion#RoomForDebate 20 62 0.14589819
## Styles##Fashion 15 119 0.12670668
## Business#BusinessDay#SmallBusiness 41 140 0.12057339
## Travel#Travel# 35 117 0.08657282
## Foreign#World#AsiaPacific 56 203 0.08120408
## #Multimedia# 52 141 0.07846883
## myOther 5 38 0.07708180
## TStyle## 105 724 0.07250498
## Culture## 70 1 0.06292232
## Foreign#World# 47 172 0.05494285
## #U.S.#Education 89 325 0.05198469
## err.abs.fit.mean err.abs.new.mean
## #Opinion#ThePublicEditor 0.37138768 NA
## OpEd#Opinion# 0.29419868 NA
## Styles#U.S.# 0.45550425 NA
## Science#Health# 0.41611329 NA
## Business#Crosswords/Games# 0.25533785 NA
## Business#Technology# 0.20182570 NA
## ## 0.11811213 NA
## Business#BusinessDay#Dealbook 0.13607671 NA
## Culture#Arts# 0.10729811 NA
## Metro#N.Y./Region# 0.12825825 NA
## #Opinion#RoomForDebate 0.10911328 NA
## Styles##Fashion 0.06654499 NA
## Business#BusinessDay#SmallBusiness 0.10835795 NA
## Travel#Travel# 0.06045359 NA
## Foreign#World#AsiaPacific 0.08085077 NA
## #Multimedia# 0.07051503 NA
## myOther 0.07641022 NA
## TStyle## 0.04581636 NA
## Culture## NA NA
## Foreign#World# 0.05339482 NA
## #U.S.#Education 0.04531883 NA
## err.abs.trn.mean err.abs.OOB.sum
## #Opinion#ThePublicEditor 0.32445159 2.22415827
## OpEd#Opinion# 0.34333510 49.39451932
## Styles#U.S.# 0.46495322 26.10326970
## Science#Health# 0.40679657 24.34625696
## Business#Crosswords/Games# 0.26458216 8.92955276
## Business#Technology# 0.22393928 28.36190775
## ## 0.14015202 70.79255480
## Business#BusinessDay#Dealbook 0.15664922 61.53856052
## Culture#Arts# 0.11481400 32.62942406
## Metro#N.Y./Region# 0.14356153 11.83673574
## #Opinion#RoomForDebate 0.09358368 2.91796380
## Styles##Fashion 0.05980978 1.90060021
## Business#BusinessDay#SmallBusiness 0.10245523 4.82293575
## Travel#Travel# 0.05451914 2.94347588
## Foreign#World#AsiaPacific 0.06046886 4.30381605
## #Multimedia# 0.06330378 3.84497275
## myOther 0.07191117 0.38540899
## TStyle## 0.03799161 7.32300251
## Culture## 0.05950658 0.06292232
## Foreign#World# 0.03429956 2.41748545
## #U.S.#Education 0.02627292 4.26274429
## err.abs.fit.sum err.abs.new.sum
## #Opinion#ThePublicEditor 5.942203 NA
## OpEd#Opinion# 128.564823 NA
## Styles#U.S.# 57.849040 NA
## Science#Health# 61.584766 NA
## Business#Crosswords/Games# 26.810474 NA
## Business#Technology# 42.988874 NA
## ## 107.836370 NA
## Business#BusinessDay#Dealbook 85.592251 NA
## Culture#Arts# 52.576076 NA
## Metro#N.Y./Region# 16.417056 NA
## #Opinion#RoomForDebate 4.582758 NA
## Styles##Fashion 6.920679 NA
## Business#BusinessDay#SmallBusiness 10.835795 NA
## Travel#Travel# 5.017648 NA
## Foreign#World#AsiaPacific 12.127615 NA
## #Multimedia# 6.487383 NA
## myOther 2.521537 NA
## TStyle## 28.543592 NA
## Culture## NA NA
## Foreign#World# 6.834537 NA
## #U.S.#Education 11.012475 NA
## err.abs.trn.sum
## #Opinion#ThePublicEditor 6.48903189
## OpEd#Opinion# 180.59426339
## Styles#U.S.# 82.29671998
## Science#Health# 79.73212861
## Business#Crosswords/Games# 32.54360605
## Business#Technology# 75.91541455
## ## 179.95519815
## Business#BusinessDay#Dealbook 149.13005939
## Culture#Arts# 77.49945086
## Metro#N.Y./Region# 28.42518264
## #Opinion#RoomForDebate 5.80218818
## Styles##Fashion 7.11736340
## Business#BusinessDay#SmallBusiness 14.34373175
## Travel#Travel# 6.37873952
## Foreign#World#AsiaPacific 12.27517856
## #Multimedia# 8.92583287
## myOther 2.73262437
## TStyle## 27.50592861
## Culture## 0.05950658
## Foreign#World# 5.89952356
## #U.S.#Education 8.53869980
## .freqRatio.Fit .freqRatio.OOB .freqRatio.Tst .n.Fit
## NA 1.000000 1.000000 NA
## .n.New.N .n.New.Y .n.OOB .n.Trn.N
## NA NA 1728.000000 5439.000000
## .n.Trn.Y .n.Tst .n.fit .n.new
## NA 1870.000000 NA 1870.000000
## .n.trn err.abs.OOB.mean err.abs.fit.mean err.abs.new.mean
## 6532.000000 4.547164 NA NA
## err.abs.trn.mean err.abs.OOB.sum err.abs.fit.sum err.abs.new.sum
## 3.247357 351.342268 NA NA
## err.abs.trn.sum
## 992.160373
tmpFeatsImp <- glb_featsimp_df
names(tmpFeatsImp) <- gsub("#", "_", names(tmpFeatsImp))
print(orderBy(as.formula(paste0("~ -", gsub("#", "_", glb_sel_mdl_id),
".imp")),
subset(tmpFeatsImp, imp > 10)))
## All.X__rcv_glmnet.imp
## PubDate.day.minutes.poly.1 100.00000
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 88.15594
## PubDate.day.minutes.poly.2 70.97396
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 66.84933
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 57.58351
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 55.69412
## PubDate.day.minutes.poly.4 55.19343
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 53.46899
## NDSSName.my.fctrScience#Health# 51.70030
## NDSSName.my.fctrOpEd#Opinion# 51.25612
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 50.91035
## NDSSName.my.fctr#Opinion#ThePublicEditor 50.37025
## NDSSName.my.fctrBusiness#Crosswords/Games# 50.15826
## NDSSName.my.fctrStyles#U.S.# 50.13310
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 47.89992
## WordCount.log1p 45.64733
## PubDate.wkend 45.38153
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 45.00131
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 44.98345
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 44.94701
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 44.94176
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 44.93139
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 44.88522
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 44.88194
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 44.87622
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 44.82052
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 44.81961
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 44.81554
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 44.79591
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 44.79069
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 44.76793
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 44.76367
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 44.76239
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 44.76087
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 44.74633
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 44.72830
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 44.71210
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 44.69504
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 44.68497
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 44.67727
## NDSSName.my.fctrBusiness#Technology# 44.64238
## WordCount.root2 44.63210
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 44.61881
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 44.59532
## PubDate.hour.fctr(15.3,23] 44.58457
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 44.50280
## PubDate.last2.log1p 44.47824
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 44.46899
## PubDate.last4.log1p 44.45424
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 44.45238
## PubDate.day.minutes.poly.3 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 44.42049
## PubDate.last16.log1p 44.42049
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 44.42049
## PubDate.wkday.fctr1 44.42049
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 44.42049
## .rnorm 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture## 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts# 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region# 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel# 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 44.42049
## PubDate.date.fctr(13,19] 44.42049
## PubDate.date.fctr(19,25] 44.42049
## PubDate.date.fctr(25,31] 44.42049
## PubDate.date.fctr(7,13] 44.42049
## PubDate.day.minutes.poly.5 44.42049
## PubDate.hour.fctr(7.67,15.3] 44.42049
## PubDate.last32.log1p 44.42049
## PubDate.last8.log1p 44.42049
## PubDate.minute.fctr(14.8,29.5] 44.42049
## PubDate.minute.fctr(44.2,59.1] 44.42049
## PubDate.month.fctr10 44.42049
## PubDate.month.fctr11 44.42049
## PubDate.month.fctr12 44.42049
## PubDate.second.fctr(14.8,29.5] 44.42049
## PubDate.second.fctr(29.5,44.2] 44.42049
## PubDate.second.fctr(44.2,59.1] 44.42049
## PubDate.wkday.fctr2 44.42049
## PubDate.wkday.fctr3 44.42049
## PubDate.wkday.fctr4 44.42049
## PubDate.wkday.fctr6 44.42049
## WordCount.nexp 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 44.42049
## PubDate.juliandate 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 44.42049
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 44.42049
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 44.42049
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 44.42049
## PubDate.minute.fctr(29.5,44.2] 44.42049
## NDSSName.my.fctr#Multimedia# 44.42049
## NDSSName.my.fctrmyOther 44.42049
## NDSSName.my.fctrForeign#World# 44.42049
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 44.42049
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 44.42049
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 44.41767
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 44.41247
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 44.40744
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 44.40155
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 44.40131
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 44.40130
## PubDate.wkday.fctr5 44.39981
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 44.39875
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 44.39148
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 44.39068
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 44.38427
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 44.38266
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 44.38183
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 44.38058
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 44.37617
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 44.37363
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 44.37301
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 44.36295
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 44.36011
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 44.35927
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 44.35398
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 44.34877
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 44.34392
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 44.33922
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 44.33746
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 44.33234
## NDSSName.my.fctrStyles##Fashion 44.02222
## NDSSName.my.fctrForeign#World#AsiaPacific 44.00596
## NDSSName.my.fctr#U.S.#Education 43.89767
## NDSSName.my.fctrTStyle## 43.56802
## NDSSName.my.fctr#Opinion#RoomForDebate 43.41965
## imp
## PubDate.day.minutes.poly.1 100.00000
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 76.35014
## PubDate.day.minutes.poly.2 94.99861
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 79.92646
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 68.23193
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 66.33201
## PubDate.day.minutes.poly.4 80.32362
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health# 67.75545
## NDSSName.my.fctrOpEd#Opinion# 66.72750
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor 68.43180
## NDSSName.my.fctrBusiness#Crosswords/Games# 65.82644
## NDSSName.my.fctrStyles#U.S.# 65.72658
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 69.33717
## WordCount.log1p 63.35579
## PubDate.wkend 63.40967
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 62.71138
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 62.60862
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 62.63968
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 62.69663
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 62.63671
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 62.70422
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 62.63024
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 62.60155
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 62.49439
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 62.63125
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 62.51548
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 62.64524
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 62.55716
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 62.48485
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 62.59625
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 62.52533
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 62.55673
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 62.53825
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 62.46260
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 62.49079
## NDSSName.my.fctrBusiness#Technology# 62.46064
## WordCount.root2 62.61255
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 62.46064
## PubDate.hour.fctr(15.3,23] 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 62.51309
## PubDate.last2.log1p 62.46192
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 62.52431
## PubDate.last4.log1p 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 62.49862
## PubDate.day.minutes.poly.3 69.56878
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 62.66614
## PubDate.last16.log1p 62.59083
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 62.49753
## PubDate.wkday.fctr1 62.47973
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 62.47326
## .rnorm 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture## 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts# 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region# 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel# 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 62.46064
## PubDate.date.fctr(13,19] 62.46064
## PubDate.date.fctr(19,25] 62.46064
## PubDate.date.fctr(25,31] 62.46064
## PubDate.date.fctr(7,13] 62.46064
## PubDate.day.minutes.poly.5 62.46064
## PubDate.hour.fctr(7.67,15.3] 62.46064
## PubDate.last32.log1p 62.46064
## PubDate.last8.log1p 62.46064
## PubDate.minute.fctr(14.8,29.5] 62.46064
## PubDate.minute.fctr(44.2,59.1] 62.46064
## PubDate.month.fctr10 62.46064
## PubDate.month.fctr11 62.46064
## PubDate.month.fctr12 62.46064
## PubDate.second.fctr(14.8,29.5] 62.46064
## PubDate.second.fctr(29.5,44.2] 62.46064
## PubDate.second.fctr(44.2,59.1] 62.46064
## PubDate.wkday.fctr2 62.46064
## PubDate.wkday.fctr3 62.46064
## PubDate.wkday.fctr4 62.46064
## PubDate.wkday.fctr6 62.46064
## WordCount.nexp 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 62.46061
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 62.46034
## PubDate.juliandate 62.46014
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 62.46001
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 62.45953
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 62.45872
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 62.45866
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 62.45805
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 62.45737
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 62.44734
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 62.44664
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 62.44232
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 62.44227
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 62.43957
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 62.41173
## PubDate.minute.fctr(29.5,44.2] 62.31764
## NDSSName.my.fctr#Multimedia# 62.18062
## NDSSName.my.fctrmyOther 62.14866
## NDSSName.my.fctrForeign#World# 62.11162
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 60.96200
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 59.93848
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 62.45895
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 62.45737
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 62.44630
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 62.44657
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 62.45255
## PubDate.wkday.fctr5 62.24996
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 62.43340
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 62.43254
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 62.42546
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 62.41722
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 62.39835
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 62.40018
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 62.40078
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 62.39911
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 62.45604
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 62.43364
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 62.42450
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 62.41764
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 62.41912
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 62.41424
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 62.37735
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 62.37645
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 62.39674
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 62.33894
## NDSSName.my.fctrStyles##Fashion 61.38622
## NDSSName.my.fctrForeign#World#AsiaPacific 61.85382
## NDSSName.my.fctr#U.S.#Education 61.72771
## NDSSName.my.fctrTStyle## 61.91481
## NDSSName.my.fctr#Opinion#RoomForDebate 60.55380
## Final__rcv_glmnet.imp
## PubDate.day.minutes.poly.1 100.00000
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.3.ctg 76.35014
## PubDate.day.minutes.poly.2 94.99861
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.5.ctg 79.92646
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.2.ctg 68.23193
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.1.ctg 66.33201
## PubDate.day.minutes.poly.4 80.32362
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health# 67.75545
## NDSSName.my.fctrOpEd#Opinion# 66.72750
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor 68.43180
## NDSSName.my.fctrBusiness#Crosswords/Games# 65.82644
## NDSSName.my.fctrStyles#U.S.# 65.72658
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.4.ctg 69.33717
## WordCount.log1p 63.35579
## PubDate.wkend 63.40967
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last2.log1p.ctg 62.71138
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last2.log1p.ctg 62.60862
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last4.log1p.ctg 62.63968
## NDSSName.my.fctrStyles#U.S.#:PubDate.last2.log1p.ctg 62.69663
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last4.log1p.ctg 62.63671
## NDSSName.my.fctrStyles#U.S.#:PubDate.last4.log1p.ctg 62.70422
## NDSSName.my.fctrScience#Health#:PubDate.last2.log1p.ctg 62.63024
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last8.log1p.ctg 62.60155
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last8.log1p.ctg 62.49439
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last16.log1p.ctg 62.63125
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last16.log1p.ctg 62.51548
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last32.log1p.ctg 62.64524
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last16.log1p.ctg 62.55716
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last8.log1p.ctg 62.48485
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.last32.log1p.ctg 62.59625
## NDSSName.my.fctrScience#Health#:PubDate.last4.log1p.ctg 62.52533
## NDSSName.my.fctrOpEd#Opinion#:PubDate.last32.log1p.ctg 62.55673
## NDSSName.my.fctrScience#Health#:PubDate.last8.log1p.ctg 62.53825
## NDSSName.my.fctrStyles#U.S.#:PubDate.last8.log1p.ctg 62.46260
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.last32.log1p.ctg 62.49079
## NDSSName.my.fctrBusiness#Technology# 62.46064
## WordCount.root2 62.61255
## NDSSName.my.fctrStyles#U.S.#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.last32.log1p.ctg 62.46064
## PubDate.hour.fctr(15.3,23] 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last2.log1p.ctg 62.51309
## PubDate.last2.log1p 62.46192
## NDSSName.my.fctrBusiness#Technology#:PubDate.last4.log1p.ctg 62.52431
## PubDate.last4.log1p 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last8.log1p.ctg 62.49862
## PubDate.day.minutes.poly.3 69.56878
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.3.ctg 62.66614
## PubDate.last16.log1p 62.59083
## NDSSName.my.fctr##:PubDate.last32.log1p.ctg 62.49753
## PubDate.wkday.fctr1 62.47973
## NDSSName.my.fctr##:PubDate.last16.log1p.ctg 62.47326
## .rnorm 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctr##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#Opinion#ThePublicEditor:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctr#U.S.#Education:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#Dealbook:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Crosswords/Games#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrBusiness#Technology#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture## 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture##:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts# 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrCulture#Arts#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrForeign#World#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region# 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last4.log1p.ctg 62.46064
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrOpEd#Opinion#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrScience#Health#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles##Fashion:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrStyles#U.S.#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTStyle##:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel# 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last16.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrTravel#Travel#:PubDate.last8.log1p.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.1.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.2.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.3.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.4.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.day.minutes.poly.5.ctg 62.46064
## NDSSName.my.fctrmyOther:PubDate.last32.log1p.ctg 62.46064
## PubDate.date.fctr(13,19] 62.46064
## PubDate.date.fctr(19,25] 62.46064
## PubDate.date.fctr(25,31] 62.46064
## PubDate.date.fctr(7,13] 62.46064
## PubDate.day.minutes.poly.5 62.46064
## PubDate.hour.fctr(7.67,15.3] 62.46064
## PubDate.last32.log1p 62.46064
## PubDate.last8.log1p 62.46064
## PubDate.minute.fctr(14.8,29.5] 62.46064
## PubDate.minute.fctr(44.2,59.1] 62.46064
## PubDate.month.fctr10 62.46064
## PubDate.month.fctr11 62.46064
## PubDate.month.fctr12 62.46064
## PubDate.second.fctr(14.8,29.5] 62.46064
## PubDate.second.fctr(29.5,44.2] 62.46064
## PubDate.second.fctr(44.2,59.1] 62.46064
## PubDate.wkday.fctr2 62.46064
## PubDate.wkday.fctr3 62.46064
## PubDate.wkday.fctr4 62.46064
## PubDate.wkday.fctr6 62.46064
## WordCount.nexp 62.46064
## NDSSName.my.fctr#Multimedia#:PubDate.last16.log1p.ctg 62.46061
## NDSSName.my.fctrmyOther:PubDate.last16.log1p.ctg 62.46034
## PubDate.juliandate 62.46014
## NDSSName.my.fctr#Multimedia#:PubDate.last4.log1p.ctg 62.46001
## NDSSName.my.fctrForeign#World#:PubDate.last2.log1p.ctg 62.45953
## NDSSName.my.fctrmyOther:PubDate.last8.log1p.ctg 62.45872
## NDSSName.my.fctrmyOther:PubDate.last2.log1p.ctg 62.45866
## NDSSName.my.fctrForeign#World#:PubDate.last16.log1p.ctg 62.45805
## NDSSName.my.fctrTravel#Travel#:PubDate.last4.log1p.ctg 62.45737
## NDSSName.my.fctrmyOther:PubDate.last4.log1p.ctg 62.44734
## NDSSName.my.fctrForeign#World#:PubDate.last4.log1p.ctg 62.44664
## NDSSName.my.fctrForeign#World#:PubDate.last8.log1p.ctg 62.44232
## NDSSName.my.fctr#Multimedia#:PubDate.last2.log1p.ctg 62.44227
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last4.log1p.ctg 62.43957
## NDSSName.my.fctrBusiness#BusinessDay#SmallBusiness:PubDate.last2.log1p.ctg 62.41173
## PubDate.minute.fctr(29.5,44.2] 62.31764
## NDSSName.my.fctr#Multimedia# 62.18062
## NDSSName.my.fctrmyOther 62.14866
## NDSSName.my.fctrForeign#World# 62.11162
## NDSSName.my.fctrCulture#Arts#:PubDate.day.minutes.poly.4.ctg 60.96200
## NDSSName.my.fctrMetro#N.Y./Region#:PubDate.day.minutes.poly.2.ctg 59.93848
## NDSSName.my.fctrStyles##Fashion:PubDate.last32.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last32.log1p.ctg 62.45895
## NDSSName.my.fctrStyles##Fashion:PubDate.last16.log1p.ctg 62.45737
## NDSSName.my.fctr#U.S.#Education:PubDate.last32.log1p.ctg 62.44630
## NDSSName.my.fctrStyles##Fashion:PubDate.last8.log1p.ctg 62.44657
## NDSSName.my.fctrStyles##Fashion:PubDate.last4.log1p.ctg 62.45255
## PubDate.wkday.fctr5 62.24996
## NDSSName.my.fctrStyles##Fashion:PubDate.last2.log1p.ctg 62.46064
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last16.log1p.ctg 62.43340
## NDSSName.my.fctr#U.S.#Education:PubDate.last16.log1p.ctg 62.43254
## NDSSName.my.fctr#U.S.#Education:PubDate.last8.log1p.ctg 62.42546
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last8.log1p.ctg 62.41722
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last4.log1p.ctg 62.39835
## NDSSName.my.fctrForeign#World#AsiaPacific:PubDate.last2.log1p.ctg 62.40018
## NDSSName.my.fctr#U.S.#Education:PubDate.last4.log1p.ctg 62.40078
## NDSSName.my.fctr#U.S.#Education:PubDate.last2.log1p.ctg 62.39911
## NDSSName.my.fctrTStyle##:PubDate.last32.log1p.ctg 62.45604
## NDSSName.my.fctrTStyle##:PubDate.last16.log1p.ctg 62.43364
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last32.log1p.ctg 62.42450
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last16.log1p.ctg 62.41764
## NDSSName.my.fctrTStyle##:PubDate.last8.log1p.ctg 62.41912
## NDSSName.my.fctrTStyle##:PubDate.last4.log1p.ctg 62.41424
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last8.log1p.ctg 62.37735
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last4.log1p.ctg 62.37645
## NDSSName.my.fctrTStyle##:PubDate.last2.log1p.ctg 62.39674
## NDSSName.my.fctr#Opinion#RoomForDebate:PubDate.last2.log1p.ctg 62.33894
## NDSSName.my.fctrStyles##Fashion 61.38622
## NDSSName.my.fctrForeign#World#AsiaPacific 61.85382
## NDSSName.my.fctr#U.S.#Education 61.72771
## NDSSName.my.fctrTStyle## 61.91481
## NDSSName.my.fctr#Opinion#RoomForDebate 60.55380
dsp_myCategory_conf_mtrx <- function(myCategory) {
print(sprintf("%s OOB::myCategory=%s confusion matrix & accuracy: ",
glb_sel_mdl_id, myCategory))
print(t(confusionMatrix(
glbObsOOB[glbObsOOB$myCategory == myCategory,
paste0(mygetPredictIds(glb_rsp_var)$value, glb_sel_mdl_id)],
glbObsOOB[glbObsOOB$myCategory == myCategory, glb_rsp_var])$table))
print(sum(glbObsOOB[glbObsOOB$myCategory == myCategory,
predct_accurate_var_name]) /
nrow(glbObsOOB[glbObsOOB$myCategory == myCategory, ]))
err_ids <- glbObsOOB[(glbObsOOB$myCategory == myCategory) &
(!glbObsOOB[, predct_accurate_var_name]), glb_id_var]
OOB_FNerr_df <- glbObsOOB[(glbObsOOB$UniqueID %in% err_ids) &
(glbObsOOB$Popular == 1),
c(
".clusterid",
"Popular", "Headline", "Snippet", "Abstract")]
print(sprintf("%s OOB::myCategory=%s FN errors: %d", glb_sel_mdl_id, myCategory,
nrow(OOB_FNerr_df)))
print(OOB_FNerr_df)
OOB_FPerr_df <- glbObsOOB[(glbObsOOB$UniqueID %in% err_ids) &
(glbObsOOB$Popular == 0),
c(
".clusterid",
"Popular", "Headline", "Snippet", "Abstract")]
print(sprintf("%s OOB::myCategory=%s FP errors: %d", glb_sel_mdl_id, myCategory,
nrow(OOB_FPerr_df)))
print(OOB_FPerr_df)
}
#dsp_myCategory_conf_mtrx(myCategory="OpEd#Opinion#")
#dsp_myCategory_conf_mtrx(myCategory="Business#Business Day#Dealbook")
#dsp_myCategory_conf_mtrx(myCategory="##")
# if (glb_is_classification) {
# print("FN_OOB_ids:")
# print(glbObsOOB[glbObsOOB$UniqueID %in% FN_OOB_ids,
# grep(glb_rsp_var, names(glbObsOOB), value=TRUE)])
# print(glbObsOOB[glbObsOOB$UniqueID %in% FN_OOB_ids,
# glbFeatsText])
# print(dsp_vctr <- colSums(glbObsOOB[glbObsOOB$UniqueID %in% FN_OOB_ids,
# setdiff(grep("[HSA].", names(glbObsOOB), value=TRUE),
# union(myfind_chr_cols_df(glbObsOOB),
# grep(".fctr", names(glbObsOOB), fixed=TRUE, value=TRUE)))]))
# }
print("glbObsNew prediction stats:")
## [1] "glbObsNew prediction stats:"
if (glb_is_regression)
print(myplot_histogram(glbObsNew, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value))
if (glb_is_classification)
print(table(glbObsNew[, mygetPredictIds(glb_rsp_var, glb_fin_mdl_id)$value]))
##
## N Y
## 1181 689
# Use this to see how prediction changes by changing one or more values
# players_df <- data.frame(id=c("Chavez", "Giambi", "Menechino", "Myers", "Pena"),
# OBP=c(0.338, 0.391, 0.369, 0.313, 0.361),
# SLG=c(0.540, 0.450, 0.374, 0.447, 0.500),
# cost=c(1400000, 1065000, 295000, 800000, 300000))
# players_df$RS.predict <- predict(glb_models_lst[[csm_mdl_id]], players_df)
# print(orderBy(~ -RS.predict, players_df))
# dsp_chisq.test(Headline.contains="[Vi]deo")
if ((length(diff <- setdiff(names(glbObsTrn), names(glbObsAll))) > 0) ||
(length(diff <- setdiff(names(glbObsFit), names(glbObsAll))) > 0) ||
(length(diff <- setdiff(names(glbObsOOB), names(glbObsAll))) > 0) ||
(length(diff <- setdiff(names(glbObsNew), names(glbObsAll))) > 0)) {
print(diff)
stop("glbObs* not in sync")
}
if (glb_save_envir)
save(glb_feats_df, glbObsAll,
#glbObsTrn, glbObsFit, glbObsOOB, glbObsNew,
glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
glb_sel_mdl, glb_sel_mdl_id,
glb_fin_mdl, glb_fin_mdl_id,
file=paste0(glb_out_pfx, "prdnew_dsk.RData"))
# tmp_replay_lst <- replay.petrisim(pn=glb_analytics_pn,
# replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
# "data.new.prediction")), flip_coord=TRUE)
# print(ggplot.petrinet(tmp_replay_lst[["pn"]]) + coord_flip())
glb_chunks_df <- myadd_chunk(glb_chunks_df, "display.session.info", major.inc=TRUE)
## label step_major step_minor label_minor bgn end
## 16 predict.data.new 8 0 0 814.509 834.656
## 17 display.session.info 9 0 0 834.656 NA
## elapsed
## 16 20.147
## 17 NA
Null Hypothesis (\(\sf{H_{0}}\)): mpg is not impacted by am_fctr.
The variance by am_fctr appears to be independent. #{r q1, cache=FALSE} # print(t.test(subset(cars_df, am_fctr == "automatic")$mpg, # subset(cars_df, am_fctr == "manual")$mpg, # var.equal=FALSE)$conf) # We reject the null hypothesis i.e. we have evidence to conclude that am_fctr impacts mpg (95% confidence). Manual transmission is better for miles per gallon versus automatic transmission.
## label step_major step_minor label_minor bgn
## 14 fit.data.training 7 0 0 357.541
## 10 fit.models 6 0 0 47.064
## 11 fit.models 6 1 1 246.208
## 9 select.features 5 0 0 22.267
## 16 predict.data.new 8 0 0 814.509
## 12 fit.models 6 2 2 336.921
## 15 fit.data.training 7 1 1 804.100
## 5 extract.features 3 0 0 13.016
## 13 fit.models 6 3 3 351.392
## 1 import.data 1 0 0 5.077
## 2 inspect.data 2 0 0 9.058
## 8 partition.data.training 4 0 0 20.916
## 6 manage.missing.data 3 1 1 19.535
## 3 scrub.data 2 1 1 11.941
## 7 cluster.data 3 2 2 20.595
## 4 transform.data 2 2 2 12.709
## end elapsed duration
## 14 804.099 446.559 446.558
## 10 246.207 199.143 199.143
## 11 336.920 90.713 90.712
## 9 47.064 24.797 24.797
## 16 834.656 20.147 20.147
## 12 351.391 14.471 14.470
## 15 814.508 10.409 10.408
## 5 19.534 6.518 6.518
## 13 357.541 6.149 6.149
## 1 9.058 3.981 3.981
## 2 11.941 2.883 2.883
## 8 22.267 1.351 1.351
## 6 20.595 1.060 1.060
## 3 12.708 0.767 0.767
## 7 20.915 0.320 0.320
## 4 13.016 0.307 0.307
## [1] "Total Elapsed Time: 834.656 secs"